In [96]:
include("Source/UnlabeledCardinalityEstimator.jl")
using Distributions
using DataStructures: counter, Dict, Set, Vector, inc!

First, we generate a zipfian graph of moderate size.

In [63]:
n = 2000
numVertices = 1000
zipf = [1.0/(i^.5) for i in 1:numVertices]
zipf = zipf ./ sum(zipf)
d = DiscreteNonParametric(1:numVertices, zipf)
x1 = rand(d, n) .% numVertices
x2 = rand(d, n) .% numVertices
g = Graph(numVertices)
for i in range(1, length(x1))
    add_edge!(g, x1[i], x2[i])
end

Then, we generate our lifted graph summary which includes cardinality and min/avg/max degree information about every edge between colors.

In [108]:
summary = generate_color_summary(g, 16)
nothing

Lastly, we create a simple acyclic query graph composed of a star with a small leg and see how our bounds compare with the exact result.

In [109]:
query_graph = DiGraph(4)
add_edge!(query_graph, (1,2))
add_edge!(query_graph, (2,3))
add_edge!(query_graph, (3,4))
add_edge!(query_graph, (4,1))

true

In [110]:
bounds_without_partial_agg = get_cardinality_bounds(query_graph, summary; use_partial_sums = false, verbose=false)

3-element Vector{Float64}:
    0.0
 3519.6954806435806
    3.436165e6

In [111]:
bounds_with_partial_agg = get_cardinality_bounds(query_graph, summary; use_partial_sums = true, verbose=false)

3-element Vector{Float64}:
    0.0
 3519.695480643603
    3.436165e6

In [106]:
bounds_without_partial_agg ./ bounds_with_partial_agg # Check that the partial summation doesn't affect the output (beyond floating point issues)

3-element Vector{Float64}:
 NaN
   0.39661674665559465
   0.9316213038222023

In [112]:
exact_size = only(get_exact_size(query_graph, g; verbose=false))

69894.0

In [113]:
println("Relative Error (Lower): ", only(bounds_with_partial_agg[1])/exact_size)
println("Relative Error (Avg): ", only(bounds_with_partial_agg[2])/exact_size)
println("Relative Error (Upper): ", only(bounds_with_partial_agg[3])/exact_size)

Relative Error (Lower): 0.0
Relative Error (Avg): 0.050357619833513646
Relative Error (Upper): 49.16251752654019


In [51]:
topological_sort_by_dfs(dfs_tree(query_graph, 1))

5-element Vector{Int64}:
 5
 1
 2
 4
 3

In [45]:
query_graph

{5, 4} directed simple Int64 graph