## G-CARE Benchmark

In [25]:
using Graphs: DiGraph
include("Source/datasets.jl")
include("Source/QuasiStableCardinalityEstimator.jl")


aids_data_file_path = "dataset/aids/aids.txt"
human_data_file_path = "dataset/human/human.txt"
lubm80_data_file_path = "dataset/lubm80/lubm80.txt"
yago_data_file_path = "dataset/yago/yago.txt"

aids_data = load_dataset(aids_data_file_path)
human_data = load_dataset(human_data_file_path)
#lubm80_data = load_dataset(lubm80_data_file_path)
#yago_data = load_dataset(yago_data_file_path)
nothing

In [2]:
#datasets = [aids_data, human_data, lubm80_data, yago_data]
#dataset_names = ["aids", "human", "lubm80", "yago"]
datasets = [aids_data, human_data]
dataset_names = ["aids", "human"]

2-element Vector{String}:
 "aids"
 "human"

In [5]:
build_time = []
summary_size = []
color_summaries =[]
for dataset in datasets
    results = @timed generate_color_summary(dataset, 32)
    push!(build_time, results[2])
    push!(summary_size,  get_color_summary_size(results[1]))
    push!(color_summaries, results[1])
end

In [70]:
triangle_query = PropertyGraph(3)
add_labeled_edge!(triangle_query, (1,2), -1)
add_labeled_edge!(triangle_query, (2,3), -1)
add_labeled_edge!(triangle_query, (3,1), -1)
add_labeled_node!(triangle_query, 1, [10])
add_labeled_node!(triangle_query, 2, [-1])
add_labeled_node!(triangle_query, 3, [-1])


chain_query = PropertyGraph(4)
add_labeled_edge!(chain_query, (1,2), 0)
add_labeled_edge!(chain_query, (2,3), 0)
add_labeled_edge!(chain_query, (3,4), 0)
add_labeled_node!(chain_query, 1, [25])
add_labeled_node!(chain_query, 2, [10])
add_labeled_node!(chain_query, 3, [5])
add_labeled_node!(chain_query, 4, [1])


tree_query = PropertyGraph(5)
add_labeled_edge!(tree_query, (1,2), 0)
add_labeled_edge!(tree_query, (2,3), 0)
add_labeled_edge!(tree_query, (3,4), 0)
add_labeled_edge!(tree_query, (2,5), 0)
add_labeled_node!(tree_query, 1, [25])
add_labeled_node!(tree_query, 2, [10])
add_labeled_node!(tree_query, 3, [5])
add_labeled_node!(tree_query, 4, [1])
add_labeled_node!(tree_query, 5, [12])

1-element Vector{Int64}:
 12

In [71]:
for i in range(1, length(datasets))
    println("Dataset:", dataset_names[i])
    println("Summary Build Time:", build_time[i])
    println("Summary Size:", summary_size[i])
    triangle_bound_results = @timed get_cardinality_bounds(triangle_query, color_summaries[i])
    triangle_exact_size_results = @timed get_exact_size(triangle_query, datasets[i])
    println("Triangle Bound Time: ", triangle_bound_results[2])
    println("Triangle Exact Size Time: ", triangle_exact_size_results[2])
    println("Triangle Exact Size: ", triangle_exact_size_results[1])
    println("Triangle Bound [Lower, Avg, Upper]: ", triangle_bound_results[1])
    println("Triangle Relative Error [Lower, Avg, Upper]: ", triangle_bound_results[1]./triangle_exact_size_results[1])

    chain_bound_results = @timed get_cardinality_bounds(chain_query, color_summaries[i])
    chain_exact_size_results = @timed get_exact_size(chain_query, datasets[i])
    println("Chain Bound Time: ", chain_bound_results[2])
    println("Chain Exact Size Time: ", chain_exact_size_results[2])
    println("Chain Exact Size: ", chain_exact_size_results[1])
    println("Chain Bound [Lower, Avg, Upper]: ", chain_bound_results[1])
    println("Chain Relative Error [Lower, Avg, Upper]: ", chain_bound_results[1]./chain_exact_size_results[1])
    
    tree_bound_results = @timed get_cardinality_bounds(tree_query, color_summaries[i])
    tree_exact_size_results = @timed get_exact_size(tree_query, datasets[i])
    println("Tree Bound Time: ", tree_bound_results[2])
    println("Tree Exact Size Time: ", tree_exact_size_results[2])
    println("Tree Exact Size: ", tree_exact_size_results[1])
    println("Tree Bound [Lower, Avg, Upper]: ", tree_bound_results[1])
    println("Tree Relative Error [Lower, Avg, Upper]: ", tree_bound_results[1]./tree_exact_size_results[1])
end

Dataset:aids
Summary Build Time:9.751231551
Summary Size:16767
Triangle Bound Time: 0.017626434
Triangle Exact Size Time: 0.049458826
Triangle Exact Size: 112
Triangle Bound [Lower, Avg, Upper]: [0.0, 0.0027283251589922904, 20960.0]
Triangle Relative Error [Lower, Avg, Upper]: [0.0, 2.4360046062431166e-5, 187.14285714285714]
Chain Bound Time: 0.02876133
Chain Exact Size Time: 0.113230813
Chain Exact Size: 0
Chain Bound [Lower, Avg, Upper]: [0.0, 0.0008854862003792051, 53.0]
Chain Relative Error [Lower, Avg, Upper]: [NaN, Inf, Inf]
Tree Bound Time: 0.00020301
Tree Exact Size Time: 0.045988151
Tree Exact Size: 0
Tree Bound [Lower, Avg, Upper]: [0.0, 9.019542960376027e-7, 16.0]
Tree Relative Error [Lower, Avg, Upper]: [NaN, Inf, Inf]
Dataset:human
Summary Build Time:0.834180826
Summary Size:87180
Triangle Bound Time: 0.027833571
Triangle Exact Size Time: 0.493410651
Triangle Exact Size: 34196
Triangle Bound [Lower, Avg, Upper]: [0.0, 5275.49109031926, 1.4841625e7]
Triangle Relative Error 

In [22]:
human_query_paths = ["human/Chain_3/uf_Q_1_1.txt",
"human/Chain_3/uf_Q_1_2.txt",
"human/Chain_3/uf_Q_1_3.txt",
"human/Star_3/uf_Q_0_1.txt",
"human/Star_3/uf_Q_0_2.txt",
"human/Star_3/uf_Q_1_1.txt",
"human/Star_3/uf_Q_1_2.txt",
"human/Star_3/uf_Q_1_3.txt",
#"human/Star_3/uf_Q_1_4.txt", <- This query has structue 'a -> b <- c' which we don't support. Need to support walking backwards along edges. 
"human/Star_3/uf_Q_1_5.txt",
#"human/Star_3/uf_Q_1_6.txt",
"human/Star_3/uf_Q_2_1.txt",
#"human/Star_3/uf_Q_2_2.txt",
#"human/Star_3/uf_Q_2_3.txt",
"human/Graph_3/uf_Q_3_1.txt",
"human/Graph_3/uf_Q_3_2.txt",
"human/Graph_3/uf_Q_3_3.txt",
"human/Graph_3/uf_Q_3_4.txt",
"human/Graph_3/uf_Q_3_5.txt",
"human/Graph_3/uf_Q_3_6.txt",
                    ]
exactSizes = []
bounds = []
relativeErrors = []
for query_path in human_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[2])
    exact_size_results = @timed get_exact_size(query, datasets[2])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

Query: human/Chain_3/uf_Q_1_1.txt
Bound Time: 0.035563612
Exact Size Time: 0.010463361
Exact Size: 72
GCare Exact Size: 72
Bound [Lower, Avg, Upper]: [0.0, 35.30101198017352, 144966.0]
Relative Error [Lower, Avg, Upper]: [0.0, 0.49029183305796553, 2013.4166666666667]
Query: human/Chain_3/uf_Q_1_2.txt
Bound Time: 0.005932401
Exact Size Time: 2.447280906
Exact Size: 22
GCare Exact Size: 22
Bound [Lower, Avg, Upper]: [0.0, 68.13434707865419, 144966.0]
Relative Error [Lower, Avg, Upper]: [0.0, 3.097015776302463, 6589.363636363636]
Query: human/Chain_3/uf_Q_1_3.txt
Bound Time: 0.005561738
Exact Size Time: 0.045959221
Exact Size: 10
GCare Exact Size: 10
Bound [Lower, Avg, Upper]: [0.0, 280.77661465119127, 92281.0]
Relative Error [Lower, Avg, Upper]: [0.0, 28.077661465119128, 9228.1]
Query: human/Star_3/uf_Q_0_1.txt
Bound Time: 0.000883148
Exact Size Time: 0.000717952
Exact Size: 1
GCare Exact Size: 1
Bound [Lower, Avg, Upper]: [0.0, 32.00598760591464, 114921.0]
Relative Error [Lower, Avg, Up

In [23]:
relativeErrors

16-element Vector{Any}:
 [0.0, 0.49029183305796553, 2013.4166666666667]
 [0.0, 3.097015776302463, 6589.363636363636]
 [0.0, 28.077661465119128, 9228.1]
 [0.0, 32.00598760591464, 114921.0]
 [0.0, 32.00598760591464, 114921.0]
 [0.0, 1.7577116870984315, 29979.391304347828]
 [0.0, 1.2819541026917034, 4596.84]
 [0.0, 1.2819541026917034, 4596.84]
 [0.0, 1.2819541026917034, 4596.84]
 [0.0, 0.5780844544980178, 4377.942857142857]
 [0.0, 0.07747229334465269, 11.906555671175859]
 [0.0, 0.04264231197888055, 11.636072727272728]
 [0.0, 0.1486367153567444, 10.56713780918728]
 [0.0, 0.04678298876516126, 6.489869585468095]
 [0.0, 0.04491366490703863, 6.989783587828189]
 [0.0, 0.18417492606195768, 2.1988555078683834]

In [26]:
aids_query_paths = ["aids/Chain_3/uf_Q_5_1.txt",
"aids/Chain_3/uf_Q_5_2.txt",
"aids/Chain_3/uf_Q_5_3.txt",
]
exactSizes = []
bounds = []
relativeErrors = []
for query_path in aids_query_paths
    println("Query: ", query_path)
    id_and_query = load_query("queryset/" * query_path)
    id = id_and_query[1]
    query = id_and_query[2]
    bound_results = @timed get_cardinality_bounds(query, color_summaries[1])
    exact_size_results = @timed get_exact_size(query, datasets[1])
    gcare_size = load_true_cardinality("TrueCardinalities/" * query_path)
    println("Bound Time: ", bound_results[2])
    println("Exact Size Time: ", exact_size_results[2])
    println("Exact Size: ", exact_size_results[1])
    println("GCare Exact Size: ", gcare_size)
    if gcare_size != exact_size_results[1]
        println("Bad Exact Size Estimate!!")
        break
    end
    println("Bound [Lower, Avg, Upper]: ", bound_results[1])
    println("Relative Error [Lower, Avg, Upper]: ", bound_results[1]./exact_size_results[1])
    push!(exactSizes, gcare_size)
    push!(bounds, bound_results[1])
    push!(relativeErrors, bound_results[1] ./ gcare_size)
end

Query: aids/Chain_3/uf_Q_5_1.txt
Bound Time: 0.038627334
Exact Size Time: 2.021363947
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 316627.281236265, 1.489136846e9]
Relative Error [Lower, Avg, Upper]: [0.0, 2.6309309771351828, 12373.590304782796]
Query: aids/Chain_3/uf_Q_5_2.txt
Bound Time: 0.016777858
Exact Size Time: 1.121023268
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 316627.2812362647, 1.489136846e9]
Relative Error [Lower, Avg, Upper]: [0.0, 2.63093097713518, 12373.590304782796]
Query: aids/Chain_3/uf_Q_5_3.txt
Bound Time: 0.010777153
Exact Size Time: 1.194008968
Exact Size: 120348
GCare Exact Size: 120348
Bound [Lower, Avg, Upper]: [0.0, 316627.281236265, 1.489136846e9]
Relative Error [Lower, Avg, Upper]: [0.0, 2.6309309771351828, 12373.590304782796]
