## G-CARE Benchmark

In [24]:
using Graphs: DiGraph
include("Source/datasets.jl")
include("Source/QuasiStableCardinalityEstimator.jl")


aids_data_file_path = "dataset/aids/aids.txt"
human_data_file_path = "dataset/human/human.txt"
lubm80_data_file_path = "dataset/lubm80/lubm80.txt"
yago_data_file_path = "dataset/yago/yago.txt"

aids_data = load_dataset(aids_data_file_path)
human_data = load_dataset(human_data_file_path)
#lubm80_data = load_dataset(lubm80_data_file_path)
#yago_data = load_dataset(yago_data_file_path)
nothing

In [16]:
#datasets = [aids_data, human_data, lubm80_data, yago_data]
#dataset_names = ["aids", "human", "lubm80", "yago"]
datasets = [aids_data, human_data]
dataset_names = ["aids", "human"]

2-element Vector{String}:
 "aids"
 "human"

In [17]:
build_time = []
summary_size = []
color_summaries =[]
for dataset in datasets
    results = @timed generate_color_summary(dataset, 32)
    push!(build_time, results[2])
    push!(summary_size,  get_color_summary_size(results[1]))
    push!(color_summaries, results[1])
end

In [20]:
triangle_query = PropertyGraph(3)
add_labeled_edge!(triangle_query, (1,2), 0)
add_labeled_edge!(triangle_query, (2,3), 0)
add_labeled_edge!(triangle_query, (3,1), 0)
add_labeled_node!(triangle_query, 1, [25])
add_labeled_node!(triangle_query, 2, [10])
add_labeled_node!(triangle_query, 3, [5])


chain_query = PropertyGraph(4)
add_labeled_edge!(chain_query, (1,2), 0)
add_labeled_edge!(chain_query, (2,3), 0)
add_labeled_edge!(chain_query, (3,4), 0)
add_labeled_node!(chain_query, 1, [25])
add_labeled_node!(chain_query, 2, [10])
add_labeled_node!(chain_query, 3, [5])
add_labeled_node!(chain_query, 4, [1])


tree_query = PropertyGraph(5)
add_labeled_edge!(tree_query, (1,2), 0)
add_labeled_edge!(tree_query, (2,3), 0)
add_labeled_edge!(tree_query, (3,4), 0)
add_labeled_edge!(tree_query, (2,5), 0)
add_labeled_node!(tree_query, 1, [25])
add_labeled_node!(tree_query, 2, [10])
add_labeled_node!(tree_query, 3, [5])
add_labeled_node!(tree_query, 4, [1])
add_labeled_node!(tree_query, 5, [12])

1-element Vector{Int64}:
 12

In [26]:
for i in range(1, length(datasets))
    println("Dataset:", dataset_names[i])
    println("Summary Build Time:", build_time[i])
    triangle_bound_results = @timed get_cardinality_bounds(triangle_query, color_summaries[i])
    triangle_exact_size_results = @timed get_exact_size(triangle_query, datasets[i])
    println("Triangle Bound Time: ", triangle_bound_results[2])
    println("Triangle Exact Size Time: ", triangle_exact_size_results[2])
    println("Triangle Exact Size: ", triangle_exact_size_results[1])
    println("Triangle Bound [Lower, Avg, Upper]: ", triangle_bound_results[1])
    println("Triangle Relative Error [Lower, Avg, Upper]: ", triangle_bound_results[1]./triangle_exact_size_results[1])

    chain_bound_results = @timed get_cardinality_bounds(chain_query, color_summaries[i])
    chain_exact_size_results = @timed get_exact_size(chain_query, datasets[i])
    println("Chain Bound Time: ", chain_bound_results[2])
    println("Chain Exact Size Time: ", chain_exact_size_results[2])
    println("Chain Exact Size: ", chain_exact_size_results[1])
    println("Chain Bound [Lower, Avg, Upper]: ", chain_bound_results[1])
    println("Chain Relative Error [Lower, Avg, Upper]: ", chain_bound_results[1]./chain_exact_size_results[1])
    
    tree_bound_results = @timed get_cardinality_bounds(tree_query, color_summaries[i])
    tree_exact_size_results = @timed get_exact_size(tree_query, datasets[i])
    println("Tree Bound Time: ", tree_bound_results[2])
    println("Tree Exact Size Time: ", tree_exact_size_results[2])
    println("Tree Exact Size: ", tree_exact_size_results[1])
    println("Tree Bound [Lower, Avg, Upper]: ", tree_bound_results[1])
    println("Tree Relative Error [Lower, Avg, Upper]: ", tree_bound_results[1]./tree_exact_size_results[1])
end

Dataset:aids
Summary Build Time:6.532514953
Triangle Bound Time: 0.000731547
Triangle Exact Size Time: 0.052625861
Triangle Exact Size: 0
Triangle Bound [Lower, Avg, Upper]: [0.0, 1.378563852030799e-7, 143.0]
Triangle Relative Error [Lower, Avg, Upper]: [NaN, Inf, Inf]
Chain Bound Time: 0.000275533
Chain Exact Size Time: 0.04814318
Chain Exact Size: 0
Chain Bound [Lower, Avg, Upper]: [0.0, 0.00045866570543918447, 415.0]
Chain Relative Error [Lower, Avg, Upper]: [NaN, Inf, Inf]
Tree Bound Time: 0.000219413
Tree Exact Size Time: 0.05720644
Tree Exact Size: 0
Tree Bound [Lower, Avg, Upper]: [0.0, 6.997024324942911e-9, 48.0]
Tree Relative Error [Lower, Avg, Upper]: [NaN, Inf, Inf]
Dataset:human
Summary Build Time:0.388583343
Triangle Bound Time: 0.006212087
Triangle Exact Size Time: 0.000828315
Triangle Exact Size: 15
Triangle Bound [Lower, Avg, Upper]: [0.0, 3.1819957352729467, 32724.0]
Triangle Relative Error [Lower, Avg, Upper]: [0.0, 0.21213304901819643, 2181.6]
Chain Bound Time: 0.000

In [23]:
tree_query.edge_labels

Dict{Int64, Dict{Int64, Array{Int64}}} with 3 entries:
  2 => Dict(5=>[0], 3=>[0])
  3 => Dict(4=>[0])
  1 => Dict(2=>[0])