## G-CARE Benchmark

In [2]:
using Graphs: DiGraph
using StatsPlots
include("Source/datasets.jl")
include("Source/QuasiStableCardinalityEstimator.jl")

get_exact_size (generic function with 1 method)

In [18]:
aids_data_file_path = "dataset/aids/aids.txt"
human_data_file_path = "dataset/human/human.txt"
lubm80_data_file_path = "dataset/lubm80/lubm80.txt"
yago_data_file_path = "dataset/yago/yago.txt"

aids_data = load_dataset(aids_data_file_path)
#human_data = load_dataset(human_data_file_path)
#lubm80_data = load_dataset(lubm80_data_file_path)
#yago_data = load_dataset(yago_data_file_path)
nothing

In [3]:
#datasets = [aids_data, human_data, lubm80_data, yago_data]
#dataset_names = ["aids", "human", "lubm80", "yago"]
datasets = [aids_data]
dataset_names = ["aids"]

1-element Vector{String}:
 "aids"

In [4]:
build_time = []
summary_size = []
color_summaries = []
for dataset in datasets
    results = @timed generate_color_summary(dataset, 64, verbose=true)
    push!(build_time, results[2])
    push!(summary_size,  get_color_summary_size(results[1]))
    push!(color_summaries, results[1])
end

Started coloring


Finished coloring


Started bloom filters
Finished bloom filters
Started cardinality counts


Finished cardinality counts
Started tracking statistics


Finished tracking statistics


In [5]:
lubm80_query_paths = ["lubm80/lubm80_Q2.txt",
"lubm80/lubm80_Q4.txt",
"lubm80/lubm80_Q7.txt",
"lubm80/lubm80_Q8.txt",
"lubm80/lubm80_Q9.txt",
"lubm80/lubm80_Q12.txt"
                    ]
exactSizes = []
bounds = []
relativeErrors = []
println("Summary Size: ", summary_size[1])
println("Summary Build Time: ", build_time[1])
for query_path in lubm80_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[1])
    exact_size_results = @timed get_exact_size(query, datasets[1])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", (bound_results[1])./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

Summary Size: 7524


Summary Build Time: 15.2740163


Query: lubm80/lubm80_Q2.txt


Bound Time: 0.4692433
Exact Size Time: 0.2025699
Exact Size: 0
GCare Exact Size: 212
Bad Exact Size Estimate!!


In [None]:
human_query_directories = ["/queryset/human/Chain_3/",
"/queryset/human/Graph_3/",
"/queryset/human/Star_3/",
"/queryset/human/Tree_3/",
                    ]
human_query_paths = [readdir(pwd() * dir, join=true) for dir in human_query_directories]
human_query_paths = [(human_query_paths...)...]
human_exact_sizes = []
human_bounds = []
human_relative_errors = []
println("Summary Size: ", summary_size[1])
println("Summary Build Time: ", build_time[1])
for query_path in human_query_paths
    println("Query: ", query_path)
    id_and_query = load_query(query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[1])
    exact_size_results = @timed get_exact_size(query, datasets[1])
    gcare_size = load_true_cardinality(replace(query_path, "queryset"=>"TrueCardinalities"))
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./exact_size_results[1])
    bound_results[1][2] = max(1, bound_results[1][2])
    push!(human_exact_sizes, gcare_size)
    push!(human_bounds, bound_results[1])
    push!(human_relative_errors, bound_results[1] ./ gcare_size)
end

In [32]:
using Statistics
println(maximum([log10(x[2]) for x in human_relative_errors]))
println(mean([log10(x[2]) for x in human_relative_errors]))
println(minimum([log10(x[2]) for x in human_relative_errors]))

2.1162262041985125
-0.09453409140278614
-1.5314789170422551


In [6]:
aids_query_directories = ["/queryset/aids/Chain_3/",
"/queryset/aids/Chain_6/",
"/queryset/aids/Chain_9/",
"/queryset/aids/Chain_12/",
"/queryset/aids/Cycle_3/",
"/queryset/aids/Cycle_6/",
"/queryset/aids/Flower_6/",
"/queryset/aids/Flower_9/",
"/queryset/aids/Flower_12/",
"/queryset/aids/Graph_3/",
"/queryset/aids/Graph_6/",
"/queryset/aids/Graph_9/",
"/queryset/aids/Graph_12/",
"/queryset/aids/Petal_6/",
"/queryset/aids/Petal_9/",
"/queryset/aids/Petal_12/",
"/queryset/aids/Star_3/",
"/queryset/aids/Star_6/",
"/queryset/aids/Star_9/",
"/queryset/aids/Tree_3/",
"/queryset/aids/Tree_6/",
"/queryset/aids/Tree_9/",
"/queryset/aids/Tree_12/"]
aids_query_paths = [readdir(pwd() * dir, join=true) for dir in aids_query_directories]
aids_query_paths = [(aids_query_paths...)...]
aids_exact_sizes = []
aids_bounds = []
aids_relative_errors = []
println("Summary Size: ", summary_size[1])
println("Summary Build Time: ", build_time[1])
for query_path in aids_query_paths
    println("Query: ", query_path)
    id_and_query = load_query(query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[2])
    gcare_size = load_true_cardinality(replace(query_path, "queryset"=>"TrueCardinalities"))
    println("Bound Time: ", bound_results[2])
    println("Exact Size: ", gcare_size)
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./gcare_size)
    bound_results[1][2] = max(1, bound_results[1][2])
    push!(aids_exact_sizes, gcare_size)
    push!(aids_bounds, bound_results[1])
    push!(aids_relative_errors, bound_results[1] ./ gcare_size)
end

Query: aids/Chain_3/uf_Q_5_1.txt


Bound Time: 0.2750232
Exact Size Time: 1.4581084
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: 

[0.0, 303950.88071850373, 5.0131578e8]
Relative Error [Lower, Avg, Upper]: [0.0, 2.5255997666642047, 4165.551400937282]
Query: aids/Chain_3/uf_Q_5_2.txt


Bound Time: 0.0002393
Exact Size Time: 0.7342707
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 303950.8807185036, 3.50206776e8]
Relative Error [Lower, Avg, Upper]: [0.0, 2.525599766664204, 2909.9509422674246]
Query: aids/Chain_3/uf_Q_5_3.txt


Bound Time: 0.0002979
Exact Size Time: 1.2440183
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 303950.88071850373, 5.0131578e8]
Relative Error [Lower, Avg, Upper]: [0.0, 2.5255997666642047, 4165.551400937282]
Query: aids/Tree_3/uf_Q_2_5.txt
Bound Time: 0.0001043
Exact Size Time: 0.0330158
Exact Size: 221
GCare Exact Size: 221
Bound [Lower, Avg, Upper]: [0.0, 140.80476832308895, 9.481703e6]
Relative Error [Lower, Avg, Upper]: [0.0, 0.6371256485207645, 42903.6334841629]
Query: aids/Tree_3/uf_Q_4_4.txt


Bound Time: 0.0001207
Exact Size Time: 0.8906588
Exact Size: 17682
GCare Exact Size: 17682
Bound [Lower, Avg, Upper]: [0.0, 290809.7675235742, 2.7710082e8]
Relative Error [Lower, Avg, Upper]: [0.0, 16.446655781222383, 15671.350525958602]
Query: aids/Tree_6/uf_Q_4_4.txt


Bound Time: 0.0003244
Exact Size Time: 0.7339137
Exact Size: 66885
GCare Exact Size: 66885
Bound [Lower, Avg, Upper]: [0.0, 1064.5842271606264, 7.6172137248e10]
Relative Error [Lower, Avg, Upper]: [0.0, 0.015916636423123666, 1.1388523173805785e6]
Query: aids/Tree_6/uf_Q_5_6.txt


Bound Time: 0.000566
Exact Size Time: 0.7863586
Exact Size: 267636
GCare Exact Size: 267636
Bound [Lower, Avg, Upper]: [0.0, 8872.301243576849, 1.96168206645e11]
Relative Error [Lower, Avg, Upper]: [0.0, 0.03315062713378188, 732966.4419024347]
Query: aids/Star_6/uf_Q_2_3.txt
Bound Time: 0.0002815
Exact Size Time: 0.0418286
Exact Size: 625
GCare Exact Size: 625
Bound [Lower, Avg, Upper]: [0.0, 695.192447299271, 3.2706922334e10]
Relative Error [Lower, Avg, Upper]: [0.0, 1.1123079156788336, 

5.23310757344e7]
Query: aids/Star_6/uf_Q_1_1.txt
Bound Time: 0.0003839
Exact Size Time: 0.0451721
Exact Size: 81
GCare Exact Size: 81
Bound [Lower, Avg, Upper]: [0.0, 1559.6062549743472, 4.3035718973e10]
Relative Error [Lower, Avg, Upper]: [0.0

, 19.25439820955984, 5.3130517250617284e8]
Query: aids/Cycle_3/uf_Q_2_14.txt


Bound Time: 0.1396392
Exact Size Time: 0.6339384
Exact Size: 290
GCare Exact Size: 290
Bound [Lower, Avg, Upper]: [0.0, 0.21921286139104404, 1.4455974e7]
Relative Error [Lower, Avg, Upper]: [0.0, 0.000755906418589807, 49848.18620689655]
Query: aids/Cycle_3/uf_Q_3_14.txt

In [30]:
using Statistics
println(maximum([log10(x[2]) for x in aids_relative_errors]))
println(mean([log10(x[2]) for x in aids_relative_errors]))
println(minimum([log10(x[2]) for x in aids_relative_errors]))


Bound Time: 0.002124
Exact Size Time: 2.6296829
Exact Size: 3240
GCare Exact Size: 3240
Bound [Lower, Avg, Upper]: [0.0, 1.8316050111256998, 6.3332473e7]
Relative Error [Lower, Avg, Upper]: [0.0, 0.0005653101886190432, 19547.059567901233]
Query: aids/Flower_6/uf_Q_3_6.txt


In [None]:
lubm80_query_paths = ["lubm80/lubm80_Q2.txt",
# "lubm80/lubm80_Q4.txt",
# "lubm80/lubm80_Q7.txt",
# "lubm80/lubm80_Q8.txt",
# "lubm80/lubm80_Q9.txt",
# "lubm80/lubm80_Q12.txt"
                    ]
exactSizes = []
bounds = []
relativeErrors = []
println("Summary Size: ", summary_size[1])
println("Summary Build Time: ", build_time[1])
for query_path in lubm80_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[1])
    exact_size_results = @timed get_exact_size(query, datasets[1])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", (bound_results[1])./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

Bound Time: 0.0017519
Exact Size Time: 0.6279784
Exact Size: 2898
GCare Exact Size: 2898
Bound [Lower, Avg, Upper]: [0.0, 1.1351858107871278, 9.1304140465e10]
Relative Error [Lower, Avg, Upper]: [0.0, 0.00039171353029231464, 3.1505914584195998e7]
Query: aids/Flower_6/uf_Q_5_3.txt


Bound Time: 0.0021339
Exact Size Time: 1.6899193
Exact Size: 231756
GCare Exact Size: 231756
Bound [Lower, Avg, Upper]: [0.0, 0.4346527946900542, 4.5954151182e10]
Relative Error [Lower, Avg, Upper]: [0.0, 1.8754759086714224e-6, 198286.7808470978]
Query: aids/Flower_9/uf_Q_3_3.txt


Bound Time: 0.0020644
Exact Size Time: 5.4279249
Exact Size: 4836
GCare Exact Size: 4836
Bound [Lower, Avg, Upper]: [0.0, 10.343756710139871, 1.959305959423456e15]
Relative Error [Lower, Avg, Upper]: [0.0, 0.0021389075083002216, 4.051501156789611e11]


In [None]:
relativeErrors

In [None]:
exactSizes