## G-CARE Benchmark

In [None]:
using Graphs: DiGraph
using StatsPlots
include("Source/datasets.jl")
include("Source/UnlabeledCardinalityEstimator.jl")


aids_data_file_path = "dataset/aids/aids.txt"
human_data_file_path = "dataset/human/human.txt"
lubm80_data_file_path = "dataset/lubm80/lubm80.txt"
yago_data_file_path = "dataset/yago/yago.txt"

aids_data = load_dataset(aids_data_file_path)
human_data = load_dataset(human_data_file_path)
#lubm80_data = load_dataset(lubm80_data_file_path)
#yago_data = load_dataset(yago_data_file_path)

In [None]:
#datasets = [aids_data, human_data, lubm80_data, yago_data]
#dataset_names = ["aids", "human", "lubm80", "yago"]
datasets = [aids_data, human_data, yago]
dataset_names = ["aids", "human", yago]

In [None]:
build_time = []
color_summaries =[]
for dataset in datasets
    results = @timed generate_color_summary(dataset, 32)
    push!(build_time, results[2])
    push!(color_summaries, results[1])
end

In [None]:
triangle_query = DiGraph(3)
add_edge!(triangle_query, (1,2))
add_edge!(triangle_query, (2,3))
add_edge!(triangle_query, (3,1))

chain_query = DiGraph(4)
add_edge!(chain_query, (1,2))
add_edge!(chain_query, (2,3))
add_edge!(chain_query, (3,4))

tree_query = DiGraph(5)
add_edge!(tree_query, (1,2))
add_edge!(tree_query, (2,3))
add_edge!(tree_query, (2,4))
add_edge!(tree_query, (4,5))

In [None]:
for i in range(1, length(datasets))
    println("Dataset:", dataset_names[i])
    println("Summary Build Time:", build_time[i])
    triangle_bound_results = @timed get_cardinality_bounds(triangle_query, color_summaries[i])
    triangle_exact_size_results = @timed get_exact_size(triangle_query, datasets[i])
    println("Triangle Bound Time: ", triangle_bound_results[2])
    println("Triangle Exact Size Time: ", triangle_exact_size_results[2])
    println("Triangle Bound [Lower, Avg, Upper]: ", triangle_bound_results[1])
    println("Triangle Relative Error [Lower, Avg, Upper]: ", triangle_bound_results[1]./triangle_exact_size_results[1])

    chain_bound_results = @timed get_cardinality_bounds(chain_query, color_summaries[i])
    chain_exact_size_results = @timed get_exact_size(chain_query, datasets[i])
    println("Chain Bound Time: ", chain_bound_results[2])
    println("Chain Exact Size Time: ", chain_exact_size_results[2])
    println("Chain Bound [Lower, Avg, Upper]: ", chain_bound_results[1])
    println("Chain Relative Error [Lower, Avg, Upper]: ", chain_bound_results[1]./chain_exact_size_results[1])

    
    tree_bound_results = @timed get_cardinality_bounds(tree_query, color_summaries[i])
    tree_exact_size_results = @timed get_exact_size(tree_query, datasets[i])
    println("Tree Bound Time: ", tree_bound_results[2])
    println("Tree Exact Size Time: ", tree_exact_size_results[2])
    println("Tree Bound [Lower, Avg, Upper]: ", tree_bound_results[1])
    println("Tree Relative Error [Lower, Avg, Upper]: ", tree_bound_results[1]./tree_exact_size_results[1])
end