This is a type of location optimization analysis, specifically finding the optimal location of facilites on a network. These are two types of Set-Coverage analysis implemented in **Julia**:

### Set-Coverage Problem
#### Objective: Determine the minimum number of facilities and their locations in order to cover all demands within a pre-specified maximum distance (or time) coverage

### Partial Set-Coverage Problem
#### Objective: Determine the minimum number of facilities and their locations in order to cover a given fraction of the population within a pre-specified maximum distance (or time) coverage

more information on GOSTNets Optimization can be found in the wiki: https://github.com/worldbank/GOST_PublicGoods/wiki/GOSTnets-Optimization

#### This is a Julia Notebook. If you are new to Julia, these are the [steps](https://datatofish.com/add-julia-to-jupyter/) to add Julia to a Jupyter Notebook

In [1]:
using Pkg
Pkg.add("JuMP")
Pkg.add("Cbc")
Pkg.add("MathOptInterface")
Pkg.add("MathProgBase")
Pkg.add("CSV")
Pkg.add("DelimitedFiles")
Pkg.add("DataFrames")
println("Done installing packages")

[32m[1m  Updating[22m[39m registry at `~/.julia/registries/General`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[32m[1m   Cloning[22m[39m [9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a] DataAPI from https://github.com/JuliaData/DataAPI.jl.git
[?25l    

┌ Info: Installed DataAPI ─ v1.0.1
└ @ Pkg.Operations /home/conda/feedstock_root/build_artifacts/julia_1548684429855/work/usr/share/julia/stdlib/v1.0/Pkg/src/Operations.jl:636














┌ Info: Installed Tables ── v0.2.11
└ @ Pkg.Operations /home/conda/feedstock_root/build_artifacts/julia_1548684429855/work/usr/share/julia/stdlib/v1.0/Pkg/src/Operations.jl:636


[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Project.toml`
[90m [no changes][39m
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Manifest.toml`
 [90m [9a962f9c][39m[93m ↑ DataAPI v1.0.0 ⇒ v1.0.1[39m
 [90m [bd369af6][39m[93m ↑ Tables v0.2.10 ⇒ v0.2.11[39m
[32m[1m Resolving[22m[39m package versions...
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Project.toml`
[90m [no changes][39m
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Manifest.toml`
[90m [no changes][39m
[32m[1m Resolving[22m[39m package versions...
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Project.toml`
[90m [no changes][39m
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Manifest.toml`
[90m [no changes][39m
[32m[1m Resolving[22m[39m package versions...
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Project.toml`
[90m [no changes][39m
[32m[1m  Updating[22m[39m `~/.julia/environments/v1.0/Manifest.toml`
[90m [

In [2]:
#using JuMP, Cbc, GLPK, CPLEX, Test, Random, MathOptInterface, MathOptFormat, CSV, DataFrames, DelimitedFiles, MathProgBase
using JuMP, Cbc, MathOptInterface, CSV, DataFrames, DelimitedFiles, MathProgBase

┌ Info: Recompiling stale cache file /home/wb546254/.julia/compiled/v1.0/CSV/HHBkp.ji for CSV [336ed68f-0bac-5ca0-87d4-7b16caf5d00b]
└ @ Base loading.jl:1190


In [3]:
# MathOptInterface is an abstraction layer for mathematical optimization solvers
const MOI = MathOptInterface

MathOptInterface

## This is the Julia Set Coverage function

In [26]:
function SetCoverageProblem(CSVfile, maxCoverage::Int)

    println("maxCoverage value")
    println(maxCoverage)

    # materialize a csv file as a DataFrame
    df = CSV.File(CSVfile) |> DataFrame!

    #extract column_headers
    column_headers = []
    #skip Column1
    for i=2:length(names(df))
      push!(column_headers,String(names(df)[i]))
    end
    
    OD_dict = Dict()
    for i in 1:size(df, 1)
        OD_dict[df[i,1]] = df[i,2:end]
    end

    #println("print OD_dict")
    #println(OD_dict)

    #origins as array
    origins = df[:,1]

    println("origins")
    println(origins)

    facilities = []
    for i in df[1,2:end]
      push!(facilities,trunc(Int, i))
    end

    println("facilities")
    println(facilities)

    #m = Model(with_optimizer(CPLEX.Optimizer))
    #output says threads were changed, but I do not see a difference on the resource monitor
    #m = Model(with_optimizer(Cbc.Optimizer, threads = 14))
    #change the limit to 
    m = Model(with_optimizer(Cbc.Optimizer, threads = 2, seconds = 68400))

    # Facility locations
    #@variable(m, 0 <= s[1:numLocation] <= 1)
    #@variable(m, 0 <= x[1:length(facilities)] <= 1)
    #binary variable
    @variable(m, x[1:length(facilities)], binary=true)

    #println("print Facility location var")
    #println(x)

    # Aux. variable: x_a,i = 1 if the closest facility to a is at i
    #@variable(m, 0 <= x[1:numLocation,1:numCustomer] <= 1)
    #@variable(m, 0 <= y[origins,1:length(facilities)] <= 1)
    #binary variable
    #@variable(m, y[origins,1:length(facilities)], binary=true)

    #println("print origin facility var")
    #println(y)

    # Objective: min distance
    #@objective(m, Min, sum(abs(customerLocations[a]-i)*x[i,a] for a = 1:numCustomer, i = 1:numLocation) )

    
    #println("testing1")
    #for j in facilities
    #    println(j)
    #end
    
    #@objective(m, Min, sum(OD_dict[i][j]*y[i,j] for i in origins, j = 1:length(facilities)) )
    @objective(m, Min, sum(x[j] for j in 1:length(facilities)))
    
    # Constraints


    # Subject to must allocate all facilities
    #@constraint(m, sum(x[i] for i=1:length(facilities)) == numFacility )


    for i in origins
        eligibleFacilities = []
        for j in 1:length(facilities)
            if OD_dict[i][j] <= maxCoverage
                push!(eligibleFacilities,j)
            end
        end
        @constraint(m, sum(x[j] for j in eligibleFacilities) >= 1 )
    end


    JuMP.optimize!(m)

    println("Objective value is: ", JuMP.objective_value(m))

    #println("Objective bound is: ", JuMP.objective_bound(m))


    println("print array values")
    println(value.(x))
    println("print array length")
    println(length(value.(x)))

    result_array = value.(x)

    selected_facilities = []

    for i=1:length(result_array)
       if result_array[i] == 1
           push!(selected_facilities,column_headers[i])
       end
    end

    println("print selected_facilities")
    println(selected_facilities)



    if termination_status(m) == MOI.OPTIMAL
        optimal_solution = value.(x)
        optimal_objective = objective_value(m)
    elseif termination_status(m) == MOI.TIME_LIMIT && has_values(model)
        suboptimal_solution = value.(x)
        suboptimal_objective = objective_value(m)
    else
        error("The model was not solved correctly.")
    end

    return selected_facilities

end

SetCoverageProblem (generic function with 1 method)

### The SetCoverage function takes in the OD matrix as a csv file for the first argument, and for the second argument it takes in the max coverage value.

In [28]:
selected_facilities = SetCoverageProblem("../../../../lima_optimization_output/saved_OD.csv", 1200)

maxCoverage value
1200
origins
[6147, 2052, 3, 6154, 6162, 4115, 6165, 21, 4125, 32, 4130, 4132, 6181, 4134, 2087, 4139, 6190, 4144, 6193, 4146, 2099, 2100, 2096, 4151, 2103, 6204, 4157, 6207, 2112, 6214, 2119, 4167, 4170, 2125, 6222, 4176, 6224, 82, 84, 4180, 2135, 4183, 6233, 6234, 99, 6245, 2150, 6247, 6248, 4198, 106, 4204, 2157, 2160, 114, 2167, 2169, 4219, 124, 2175, 6271, 4223, 130, 6278, 6279, 4233, 4234, 6283, 6286, 6292, 6293, 150, 149, 4244, 2197, 152, 2202, 4252, 2196, 161, 163, 6307, 6311, 170, 173, 4269, 6318, 175, 2225, 174, 2231, 6332, 6334, 4302, 6355, 6357, 2263, 4312, 6361, 6363, 222, 224, 6369, 2278, 2280, 6377, 4329, 4340, 4341, 4342, 6393, 4348, 4354, 4356, 2319, 4373, 2328, 4378, 2334, 2335, 4388, 6437, 294, 295, 296, 297, 6440, 2343, 298, 4394, 2354, 2356, 4404, 4410, 321, 4419, 6478, 335, 6485, 6486, 2389, 2395, 6493, 2397, 351, 352, 6495, 6498, 4455, 4456, 2410, 2413, 368, 6513, 370, 6514, 4468, 2424, 380, 6528, 6529, 384, 4482, 386, 389, 6533, 2433, 6536, 244

3-element Array{Any,1}:
 "2048"
 "2959"
 "4919"

In [29]:
selected_facilities

3-element Array{Any,1}:
 "2048"
 "2959"
 "4919"

## This is the Partial Julia Set Coverage function

In [61]:
function PartialSetCoverageProblem(CSVfile, maxCoverage::Int, popCoverage, origins_pop_dict)

    println("maxCoverage value")
    println(maxCoverage)

    # materialize a csv file as a DataFrame
    df = CSV.File(CSVfile) |> DataFrame!

    #extract column_headers
    column_headers = []
    #skip Column1
    for i=2:length(names(df))
      push!(column_headers,String(names(df)[i]))
    end
    
    OD_dict = Dict()
    for i in 1:size(df, 1)
        OD_dict[df[i,1]] = df[i,2:end]
    end

    #println("print OD_dict")
    #println(OD_dict)

    #origins as array
    origins = df[:,1]

    #println("origins")
    #println(origins)

    facilities = []
    for i in df[1,2:end]
      push!(facilities,trunc(Int, i))
    end

    println("facilities")
    println(facilities)

    #m = Model(with_optimizer(CPLEX.Optimizer))
    #output says threads were changed, but I do not see a difference on the resource monitor
    #m = Model(with_optimizer(Cbc.Optimizer, threads = 14))
    #change the limit to 
    m = Model(with_optimizer(Cbc.Optimizer, threads = 2, seconds = 68400))

    # Facility locations
    #@variable(m, 0 <= s[1:numLocation] <= 1)
    #@variable(m, 0 <= x[1:length(facilities)] <= 1)
    #binary variable
    @variable(m, x[1:length(facilities)], binary=true)

    #println("print Facility location var")
    #println(x)

    # Aux. variable: x_a,i = 1 if the closest facility to a is at i
    #@variable(m, 0 <= x[1:numLocation,1:numCustomer] <= 1)
    #@variable(m, 0 <= y[origins,1:length(facilities)] <= 1)
    #binary variable
    @variable(m, z[origins], binary=true)

    #println("print origin facility var")
    #println(y)

    # Objective: min distance
    #@objective(m, Min, sum(abs(customerLocations[a]-i)*x[i,a] for a = 1:numCustomer, i = 1:numLocation) )

    
    #println("testing1")
    #for j in facilities
    #    println(j)
    #end
    
    #@objective(m, Min, sum(OD_dict[i][j]*y[i,j] for i in origins, j = 1:length(facilities)) )
    @objective(m, Min, sum(x[j] for j in 1:length(facilities)))
    
    # Constraints


    # Subject to must allocate all facilities
    #@constraint(m, sum(x[i] for i=1:length(facilities)) == numFacility )


    for i in origins
        eligibleFacilities = []
        for j in 1:length(facilities)
            if OD_dict[i][j] <= maxCoverage
                push!(eligibleFacilities,j)
            end
        end
        @constraint(m, sum(x[j] for j in eligibleFacilities) - z[i] >= 0 )
    end


    # if origins_pop_dict is not NONE
    
    origins_pop_dict_sum = 0
    for (key,value) in origins_pop_dict
        origins_pop_dict_sum = origins_pop_dict_sum + value
        #println(value)
    end
    println("print origins_pop_dict_sum")
    println(origins_pop_dict_sum)
    
    min_coverage = origins_pop_dict_sum * popCoverage

    println("print min_coverage")
    println(min_coverage)
    
    @constraint(m, sum(z[i] * origins_pop_dict[i] for i in origins) >= min_coverage)
    
    JuMP.optimize!(m)

    println("Objective value is: ", JuMP.objective_value(m))

    #println("Objective bound is: ", JuMP.objective_bound(m))


    println("print array values")
    println(value.(x))
    println("print array length")
    println(length(value.(x)))

    result_array = value.(x)

    selected_facilities = []

    for i=1:length(result_array)
       if result_array[i] == 1
           push!(selected_facilities,column_headers[i])
       end
    end

    println("print selected_facilities")
    println(selected_facilities)



    if termination_status(m) == MOI.OPTIMAL
        optimal_solution = value.(x)
        optimal_objective = objective_value(m)
    elseif termination_status(m) == MOI.TIME_LIMIT && has_values(model)
        suboptimal_solution = value.(x)
        suboptimal_objective = objective_value(m)
    else
        error("The model was not solved correctly.")
    end

    return selected_facilities

end

PartialSetCoverageProblem (generic function with 1 method)

### Inputs include the OD matrix, a pre-specified maximum distance coverage, a pop coverage value, and a dictionary of origins with their population  

### import an origins_pop_series from csv

In [62]:
# materialize a csv file as a DataFrame
origins_pop_series = CSV.File("../../../../lima_optimization_output/origins_w_demands_series_no_dupl.csv") |> DataFrame!

Unnamed: 0_level_0,NN,pop
Unnamed: 0_level_1,Int64,Float64
1,3,1458.0
2,21,2232.0
3,32,2041.0
4,82,1508.0
5,84,1610.0
6,99,1295.0
7,106,1216.0
8,114,824.0
9,124,440.0
10,130,1104.0


In [63]:
origins_pop_dict = Dict()
for i in 1:size(origins_pop_series,1)
    origins_pop_dict[origins_pop_series[i,1]] = origins_pop_series[i,2]
end

In [64]:
#origins_pop_dict

In [66]:
selected_facilities = PartialSetCoverageProblem("../../../../lima_optimization_output/saved_OD.csv", 900, .8, origins_pop_dict)

maxCoverage value
900
origins
[6147, 2052, 3, 6154, 6162, 4115, 6165, 21, 4125, 32, 4130, 4132, 6181, 4134, 2087, 4139, 6190, 4144, 6193, 4146, 2099, 2100, 2096, 4151, 2103, 6204, 4157, 6207, 2112, 6214, 2119, 4167, 4170, 2125, 6222, 4176, 6224, 82, 84, 4180, 2135, 4183, 6233, 6234, 99, 6245, 2150, 6247, 6248, 4198, 106, 4204, 2157, 2160, 114, 2167, 2169, 4219, 124, 2175, 6271, 4223, 130, 6278, 6279, 4233, 4234, 6283, 6286, 6292, 6293, 150, 149, 4244, 2197, 152, 2202, 4252, 2196, 161, 163, 6307, 6311, 170, 173, 4269, 6318, 175, 2225, 174, 2231, 6332, 6334, 4302, 6355, 6357, 2263, 4312, 6361, 6363, 222, 224, 6369, 2278, 2280, 6377, 4329, 4340, 4341, 4342, 6393, 4348, 4354, 4356, 2319, 4373, 2328, 4378, 2334, 2335, 4388, 6437, 294, 295, 296, 297, 6440, 2343, 298, 4394, 2354, 2356, 4404, 4410, 321, 4419, 6478, 335, 6485, 6486, 2389, 2395, 6493, 2397, 351, 352, 6495, 6498, 4455, 4456, 2410, 2413, 368, 6513, 370, 6514, 4468, 2424, 380, 6528, 6529, 384, 4482, 386, 389, 6533, 2433, 6536, 2444

3-element Array{Any,1}:
 "2048"
 "3914"
 "3409"

In [67]:
#write-out selected_facilities
#writedlm("../../../../lima_optimization_output/selected_facilities_file_from_julia",selected_facilities)