## G-CARE Benchmark

In [34]:
using Graphs: DiGraph
using StatsPlots
include("Source/datasets.jl")
include("Source/QuasiStableCardinalityEstimator.jl")


aids_data_file_path = "dataset/aids/aids.txt"
human_data_file_path = "dataset/human/human.txt"
lubm80_data_file_path = "dataset/lubm80/lubm80.txt"
yago_data_file_path = "dataset/yago/yago.txt"

aids_data = load_dataset(aids_data_file_path)
human_data = load_dataset(human_data_file_path)
#lubm80_data = load_dataset(lubm80_data_file_path)
#yago_data = load_dataset(yago_data_file_path)
nothing

In [28]:
#datasets = [aids_data, human_data, lubm80_data, yago_data]
#dataset_names = ["aids", "human", "lubm80", "yago"]
datasets = [aids_data, human_data, yago]
dataset_names = ["aids", "human", yago]

2-element Vector{String}:
 "aids"
 "human"

In [42]:
build_time = []
summary_size = []
color_summaries =[]
for dataset in datasets
    results = @timed generate_color_summary(dataset, 256)
    push!(build_time, results[2])
    push!(summary_size,  get_color_summary_size(results[1]))
    push!(color_summaries, results[1])
end

In [1]:
human_query_paths = ["human/Chain_3/uf_Q_1_1.txt",
"human/Chain_3/uf_Q_1_2.txt",
"human/Chain_3/uf_Q_1_3.txt",
"human/Star_3/uf_Q_0_1.txt",
"human/Star_3/uf_Q_0_2.txt",
"human/Star_3/uf_Q_1_1.txt",
"human/Star_3/uf_Q_1_2.txt",
"human/Star_3/uf_Q_1_3.txt",
"human/Star_3/uf_Q_1_4.txt", 
"human/Star_3/uf_Q_1_5.txt",
"human/Star_3/uf_Q_1_6.txt",
"human/Star_3/uf_Q_2_1.txt",
"human/Star_3/uf_Q_2_2.txt",
"human/Star_3/uf_Q_2_3.txt",
"human/Graph_3/uf_Q_3_1.txt",
"human/Graph_3/uf_Q_3_2.txt",
"human/Graph_3/uf_Q_3_3.txt",
"human/Graph_3/uf_Q_3_4.txt",
"human/Graph_3/uf_Q_3_5.txt",
"human/Graph_3/uf_Q_3_6.txt",
                    ]
exactSizes = []
bounds = []
relativeErrors = []
println("Summary Size: ", summary_size[2])
println("Summary Build Time: ", build_time[2])
for query_path in human_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[2])
    exact_size_results = @timed get_exact_size(query, datasets[2])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

LoadError: syntax: cannot juxtapose string literal

In [49]:
aids_query_paths = ["aids/Chain_3/uf_Q_5_1.txt",
"aids/Chain_3/uf_Q_5_2.txt",
"aids/Chain_3/uf_Q_5_3.txt",
"aids/Tree_3/uf_Q_2_5.txt",
"aids/Tree_3/uf_Q_4_4.txt",
"aids/Tree_6/uf_Q_4_4.txt",
"aids/Tree_6/uf_Q_5_6.txt",
"aids/Star_6/uf_Q_2_3.txt",
"aids/Star_6/uf_Q_1_1.txt",
"aids/Cycle_3/uf_Q_2_14.txt",
"aids/Cycle_3/uf_Q_3_14.txt",
"aids/Flower_6/uf_Q_3_6.txt",
"aids/Flower_6/uf_Q_5_3.txt",
"aids/Flower_9/uf_Q_3_3.txt"
]
exactSizes = []
bounds = []
relativeErrors = []
exactTimes = []
boundTimes = []
for query_path in aids_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[1]; try_all_starting_nodes=false)
    exact_size_results = @timed get_exact_size(query, datasets[1])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./gcare_size)
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
    push!(exactTimes, exact_size_results[2])
    push!(boundTimes, bound_results[2])
end

Query: aids/Chain_3/uf_Q_5_1.txt
Bound Time: 0.019921004
Exact Size Time: 2.911896261
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 294313.8547063301, 8.708159728e9]
Relative Error [Lower, Avg, Upper]: [0.0, 2.4455234379161275, 72358.15907202446]
Query: aids/Chain_3/uf_Q_5_2.txt
Bound Time: 0.025669944
Exact Size Time: 1.01649308
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 294313.8547063292, 7.07787046e9]
Relative Error [Lower, Avg, Upper]: [0.0, 2.44552343791612, 58811.699903612855]
Query: aids/Chain_3/uf_Q_5_3.txt
Bound Time: 0.019291316
Exact Size Time: 2.612219316
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 294313.8547063301, 8.708159728e9]
Relative Error [Lower, Avg, Upper]: [0.0, 2.4455234379161275, 72358.15907202446]
Query: aids/Tree_3/uf_Q_2_5.txt
Bound Time: 0.011759344
Exact Size Time: 0.060378201
Exact Size: 221
GCare Exact Size: 221
Bound [Lower, Avg, Upper]: [0.0, 141.493528468

In [50]:
relativeErrors

14-element Vector{Any}:
 [0.0, 2.4455234379161275, 72358.15907202446]
 [0.0, 2.44552343791612, 58811.699903612855]
 [0.0, 2.4455234379161275, 72358.15907202446]
 [0.0, 0.6402422102631572, 35998.610859728506]
 [0.0, 14.824686422539688, 341317.97658635903]
 [0.011482395155864543, 0.9611912936855616, 2.541585956014054e6]
 [0.0, 0.12429483728044356, 2.7722027399164164e8]
 [0.0, 0.2911265432098766, 180147.1232]
 [0.0, 21.695283122596635, 6.7916178923666664e10]
 [0.0, 0.00018059634583532025, 273613.46896551724]
 [0.0, 0.00010358370510822811, 143265.28302469137]
 [0.0, 9.600891768533859e-5, 5.263060264876467e9]
 [0.0, 3.50443013970849e-6, 1.3055389384736101e8]
 [0.0, 0.0004243600861797454, 1.633232201774793e14]

In [52]:
exactSizes

14-element Vector{Any}:
 120348
 120348
 120348
    221
  17682
  66885
 267636
    625
     81
    290
   3240
   2898
 231756
   4836