In [24]:
include("Source/UnlabeledCardinalityEstimator.jl")
using Distributions
using DataStructures: counter, Dict, Set, Vector, inc!

First, we generate a zipfian graph of moderate size.

In [25]:
n = 20000
numVertices = 1000
zipf = [1.0/(i^.5) for i in 1:numVertices]
zipf = zipf ./ sum(zipf)
d = DiscreteNonParametric(1:numVertices, zipf)
x1 = rand(d, n) .% numVertices
x2 = rand(d, n) .% numVertices
g = Graph(numVertices)
for i in range(1, length(x1))
    add_edge!(g, x1[i], x2[i])
end

Then, we generate our lifted graph summary which includes cardinality and min/avg/max degree information about every edge between colors.

In [26]:
summary = generate_color_summary(g, 16)
nothing

Lastly, we create a simple acyclic query graph composed of a star with a small leg and see how our bounds compare with the exact result.

In [27]:
queryGraph = DiGraph(5)
add_edge!(queryGraph, (1,2))
add_edge!(queryGraph, (2,3))
add_edge!(queryGraph, (2,4))
add_edge!(queryGraph, (4,5))

true

In [28]:
bounds_without_partial_agg = get_cardinality_bounds(queryGraph, summary; use_partial_sums = false)

3-element Vector{Float64}:
 4.273825624e9
 1.6976469286123102e10
 6.936907186e10

In [29]:
bounds_with_partial_agg = get_cardinality_bounds(queryGraph, summary; use_partial_sums = true)

3-element Vector{Float64}:
 4.273825624e9
 1.69764692861239e10
 6.936907186e10

In [30]:
bounds_without_partial_agg ./ bounds_with_partial_agg # Check that the partial summation doesn't affect the output (beyond floating point issues)

3-element Vector{Float64}:
 1.0
 0.999999999999953
 1.0

In [34]:
exact_size = only(get_exact_size(queryGraph, g))

1.7671470509e10

In [35]:
println("Relative Error (Lower): ", only(bounds_with_partial_agg[1])/exact_size)
println("Relative Error (Avg): ", only(bounds_with_partial_agg[2])/exact_size)
println("Relative Error (Upper): ", only(bounds_with_partial_agg[3])/exact_size)

Relative Error (Lower): 0.2418488954738294
Relative Error (Avg): 0.9606710023072421
Relative Error (Upper): 3.9254838370508356
