## G-CARE Benchmark

In [1]:
using Graphs: DiGraph
using StatsPlots
include("Source/datasets.jl")
include("Source/QuasiStableCardinalityEstimator.jl")


aids_data_file_path = "dataset/aids/aids.txt"
human_data_file_path = "dataset/human/human.txt"
lubm80_data_file_path = "dataset/lubm80/lubm80.txt"
yago_data_file_path = "dataset/yago/yago.txt"

#aids_data = load_dataset(aids_data_file_path)
#human_data = load_dataset(human_data_file_path)
lubm80_data = load_dataset(lubm80_data_file_path)
#yago_data = load_dataset(yago_data_file_path)
nothing

In [2]:
#datasets = [aids_data, human_data, lubm80_data, yago_data]
#dataset_names = ["aids", "human", "lubm80", "yago"]
datasets = [lubm80_data]
dataset_names = ["lubm80"]

1-element Vector{String}:
 "lubm80"

In [3]:
build_time = []
summary_size = []
color_summaries =[]
for dataset in datasets
    results = @timed generate_color_summary(dataset, 8, verbose=true)
    push!(build_time, results[2])
    push!(summary_size,  get_color_summary_size(results[1]))
    push!(color_summaries, results[1])
end

Started coloring
Finished coloring


Started bloom filters


Finished bloom filters
Started cardinality counts


Finished cardinality counts
Started tracking statistics


Finished tracking statistics


In [6]:
lubm80_query_paths = ["lubm80/lubm80_Q2.txt",
# "lubm80/lubm80_Q4.txt",
# "lubm80/lubm80_Q7.txt",
# "lubm80/lubm80_Q8.txt",
# "lubm80/lubm80_Q9.txt",
# "lubm80/lubm80_Q12.txt"
                    ]
exactSizes = []
bounds = []
relativeErrors = []
println("Summary Size: ", summary_size[1])
println("Summary Build Time: ", build_time[1])
for query_path in lubm80_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[1])
    exact_size_results = @timed get_exact_size(query, datasets[1])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", (bound_results[1])./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

Summary Size: 7209
Summary Build Time: 440.3114781


Query: lubm80/lubm80_Q2.txt
PARTIAL PATHS: [([4, 4, 2], [0.0, 0.0, 276232.0]), ([4, 4, 1], [0.0, 0.0, 155782.0])]
NUMBER OF PARTIAL PATHS: 2
NUMBER OF ZERO AVG: 2


Bound Time: 0.0438784
Exact Size Time: 3.5055465
Exact Size: 212
GCare Exact Size: 212
Bound [Lower, Avg, Upper]: [0.0, 0.0, 432014.0]
Relative Error [Lower, Avg, Upper]: [0.0, 0.0, 2037.801886792453]


In [None]:
human_query_paths = ["human/Chain_3/uf_Q_1_1.txt",
"human/Chain_3/uf_Q_1_2.txt",
"human/Chain_3/uf_Q_1_3.txt",
"human/Star_3/uf_Q_0_1.txt",
"human/Star_3/uf_Q_0_2.txt",
"human/Star_3/uf_Q_1_1.txt",
"human/Star_3/uf_Q_1_2.txt",
"human/Star_3/uf_Q_1_3.txt",
"human/Star_3/uf_Q_1_4.txt", 
"human/Star_3/uf_Q_1_5.txt",
"human/Star_3/uf_Q_1_6.txt",
"human/Star_3/uf_Q_2_1.txt",
"human/Star_3/uf_Q_2_2.txt",
"human/Star_3/uf_Q_2_3.txt",
"human/Graph_3/uf_Q_3_1.txt",
"human/Graph_3/uf_Q_3_2.txt",
"human/Graph_3/uf_Q_3_3.txt",
"human/Graph_3/uf_Q_3_4.txt",
"human/Graph_3/uf_Q_3_5.txt",
"human/Graph_3/uf_Q_3_6.txt",
                    ]
exactSizes = []
bounds = []
relativeErrors = []
println("Summary Size: ", summary_size[2])
println("Summary Build Time: ", build_time[2])
for query_path in human_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[2])
    exact_size_results = @timed get_exact_size(query, datasets[2])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

In [None]:
yago_query_paths = ["yago/Chain_3/f_Q_1_1.txt",
"yago/Chain_3/f_Q_1_2.txt",
"yago/Chain_3/f_Q_1_3.txt",
"yago/Star_3/f_Q_0_1.txt",
"yago/Star_3/f_Q_0_2.txt",
"yago/Star_3/f_Q_1_1.txt",
"yago/Star_3/f_Q_1_2.txt",
"yago/Star_3/f_Q_1_3.txt",
"yago/Star_3/f_Q_1_4.txt", 
"yago/Star_3/f_Q_1_5.txt",
"yago/Star_3/f_Q_1_6.txt",
"yago/Star_3/f_Q_2_1.txt",
"yago/Star_3/f_Q_2_2.txt",
"yago/Star_3/f_Q_2_3.txt",
"yago/Graph_3/f_Q_3_1.txt",
"yago/Graph_3/f_Q_3_2.txt",
"yago/Graph_3/f_Q_3_3.txt",
"yago/Graph_3/f_Q_3_7.txt",
"yago/Graph_3/f_Q_3_8.txt",
"yago/Graph_3/f_Q_3_10.txt",
                    ]
exactSizes = []
bounds = []
relativeErrors = []
println("Summary Size: ", summary_size[2])
println("Summary Build Time: ", build_time[2])
for query_path in yago_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[2])
    exact_size_results = @timed get_exact_size(query, datasets[2])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

In [None]:
aids_query_paths = ["aids/Chain_3/uf_Q_5_1.txt",
"aids/Chain_3/uf_Q_5_2.txt",
"aids/Chain_3/uf_Q_5_3.txt",
"aids/Tree_3/uf_Q_2_5.txt",
"aids/Tree_3/uf_Q_4_4.txt",
"aids/Tree_6/uf_Q_4_4.txt",
"aids/Tree_6/uf_Q_5_6.txt",
"aids/Star_6/uf_Q_2_3.txt",
"aids/Star_6/uf_Q_1_1.txt",
"aids/Cycle_3/uf_Q_2_14.txt",
"aids/Cycle_3/uf_Q_3_14.txt",
"aids/Flower_6/uf_Q_3_6.txt",
"aids/Flower_6/uf_Q_5_3.txt",
"aids/Flower_9/uf_Q_3_3.txt"
]
exactSizes = []
bounds = []
relativeErrors = []
exactTimes = []
boundTimes = []
for query_path in aids_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[1])
    exact_size_results = @timed get_exact_size(query, datasets[1])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./gcare_size)
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
    push!(exactTimes, exact_size_results[2])
    push!(boundTimes, bound_results[2])
end

In [6]:
relativeErrors

6-element Vector{Any}:
 [0.0, 0.0, 2037.801886792453]
 [0.0, 0.018128757995797183, 2.430169411764706e7]
 [0.0, 8578.183718779555, 317560.89552238805]
 [0.0, 0.007440463481413045, 587.3258023106547]
 [0.0, 0.0, 421.54288588149234]
 [0.0, 59.25191821732139, 9673.333333333334]

In [7]:
exactSizes

6-element Vector{Any}:
   212
    34
    67
  7790
 21872
    15