In [58]:
include("Source/UnlabeledCardinalityEstimator.jl")
using Distributions
using DataStructures: counter, Dict, Set, Vector, inc!
using Plots

First, we generate a zipfian graph of moderate size.

In [7]:
n = 200000
numVertices = 10000
zipf = [1.0/(i^.5) for i in 1:numVertices]
zipf = zipf ./ sum(zipf)
d = DiscreteNonParametric(1:numVertices, zipf)
x1 = rand(d, n) .% numVertices
x2 = rand(d, n) .% numVertices
g = Graph(numVertices)
for i in range(1, length(x1))
    add_edge!(g, x1[i], x2[i])
end

Then, we generate our lifted graph summary which includes cardinality and min/avg/max degree information about every edge between colors.

In [11]:
summary = generate_color_summary(g, 32)
nothing

Lastly, we create a simple acyclic query graph composed of a star with a small leg and see how our bounds compare with the exact result.

In [12]:
query_graph = DiGraph(4)
add_edge!(query_graph, (1,2))
add_edge!(query_graph, (2,3))
add_edge!(query_graph, (3,1))
add_edge!(query_graph, (4,1))

true

In [13]:
bounds_without_partial_agg = get_cardinality_bounds(query_graph, summary; use_partial_sums = false, try_all_starting_nodes=false, verbose=false)

3-element Vector{Float64}:
 0.0
 1.9284923198480654e8
 1.9890786775e10

In [14]:
bounds_with_partial_agg = get_cardinality_bounds(query_graph, summary; use_partial_sums = true, try_all_starting_nodes=false, verbose=false)

3-element Vector{Float64}:
 0.0
 1.928492319848064e8
 1.9890786775e10

In [15]:
bounds_without_partial_agg ./ bounds_with_partial_agg # Check that the partial summation doesn't affect the output (beyond floating point issues)

3-element Vector{Float64}:
 NaN
   1.0000000000000007
   1.0

In [16]:
exact_size = only(get_exact_size(query_graph, g; verbose=false))

2.122302e8

In [17]:
println("Relative Error (Lower): ", only(bounds_with_partial_agg[1])/exact_size)
println("Relative Error (Avg): ", only(bounds_with_partial_agg[2])/exact_size)
println("Relative Error (Upper): ", only(bounds_with_partial_agg[3])/exact_size)

Relative Error (Lower): 0.0
Relative Error (Avg): 0.9086794998299318
Relative Error (Upper): 93.72269721745539


In [59]:
query_graph = DiGraph(3)
add_edge!(query_graph, (1,2))
add_edge!(query_graph, (2,3))
add_edge!(query_graph, (3,1))

bounds = []
exact_sizes = []
graph_sizes = []
for i in range(1, 12)
    numVertices = 5*2^i
    push!(graph_sizes, numVertices)
    n = numVertices*5
    zipf = [1.0/(i^.5) for i in 1:numVertices]
    zipf = zipf ./ sum(zipf)
    d = DiscreteNonParametric(1:numVertices, zipf)
    x1 = rand(d, n) .% numVertices
    x2 = rand(d, n) .% numVertices
    g = Graph(numVertices)
    for i in range(1, length(x1))
        add_edge!(g, x1[i], x2[i])
    end
    summary = generate_color_summary(g, 64)
    bounds_with_partial_agg = get_cardinality_bounds(query_graph, summary; use_partial_sums = true, try_all_starting_nodes=true, verbose=false)
    push!(bounds, bounds_with_partial_agg)
    exact_size = only(get_exact_size(query_graph, g; verbose=false))
    push!(exact_sizes, exact_size)
end

In [60]:
bounds

12-element Vector{Any}:
 [0.0, 171.0, 279.0]
 [0.0, 483.0, 1120.0]
 [0.0, 1109.0, 3417.0]
 [0.0, 1572.2307956104223, 9447.0]
 [0.0, 1952.4000137607345, 36369.0]
 [0.0, 2813.516113705718, 122116.0]
 [0.0, 3815.440652754484, 346752.0]
 [0.0, 4980.663248177357, 914500.0]
 [0.0, 6231.386440295614, 2.241545e6]
 [0.0, 7089.77616417361, 5.224294e6]
 [0.0, 8360.377276053316, 1.1937213e7]
 [0.0, 11470.500192889385, 2.5726497e7]

In [61]:
exact_sizes

12-element Vector{Any}:
   171.0
   483.0
  1109.0
  1600.0
  2228.0
  3467.0
  5556.0
  7776.0
  9983.0
 11816.0
 14092.0
 21347.0

In [62]:
bounds ./ exact_sizes

12-element Vector{Vector{Float64}}:
 [0.0, 1.0, 1.631578947368421]
 [0.0, 1.0, 2.318840579710145]
 [0.0, 1.0, 3.0811541929666366]
 [0.0, 0.982644247256514, 5.904375]
 [0.0, 0.876301621975195, 16.323608617594253]
 [0.0, 0.8115131565346749, 35.22238246322469]
 [0.0, 0.6867243795454434, 62.41036717062635]
 [0.0, 0.6405173930269235, 117.60545267489712]
 [0.0, 0.6241997836617864, 224.5362115596514]
 [0.0, 0.6000149089517273, 442.13727149627624]
 [0.0, 0.5932711663392929, 847.0914703377803]
 [0.0, 0.537335466008778, 1205.157492856139]

In [None]:
bounds ./ exact_sizes

14-element Vector{Vector{Float64}}:
 [0.0, 1.0, 2.736842105263158]
 [0.0, 1.0, 7.333333333333333]
 [0.0, 1.0, 15.53191489361702]
 [0.0, 0.9875548245614034, 9.663157894736843]
 [0.0, 0.9107107108676952, 28.057971014492754]
 [0.0, 1.0676232023224979, 86.01661129568106]
 [0.0, 0.977584544783465, 276.12635379061373]
 [0.0, 0.48224728675253825, 228.56622851365015]
 [0.0, 0.6498646765908367, 708.6459627329192]
 [0.0, 0.41313033756326, 790.8246789503071]
 [0.0, 0.7409502174557998, 3081.6657169990503]
 [0.0, 0.6118814620470446, 4409.881546134663]
 [0.0, 0.7739815683643275, 10752.05462184874]
 [0.0, 0.2866898964345087, 7660.481024234111]

In [113]:
lower_bounds = max.(.001, [x[1] for x in bounds]./exact_sizes)
avg_bounds = [x[2] for x in bounds]./exact_sizes
upper_bounds = [x[3] for x in bounds]./exact_sizes
plot(graph_sizes, [lower_bounds avg_bounds upper_bounds], title="Triangle Query", 
label = ["Lower" "Avg" "Upper"])
plot!(xscale=:log10, yscale=:log10)
xlims!(1e+0, 1e+4)
ylims!(1e-4, 1e+3)
title!("Log-log plot")
xlabel!("Graph Size")
ylabel!("Relative Error")

AssertionError: AssertionError: total_plotarea_horizontal > 0mm

In [111]:
lower_bounds

12-element Vector{Float64}:
 0.001
 0.001
 0.001
 0.001
 0.001
 0.001
 0.001
 0.001
 0.001
 0.001
 0.001
 0.001

In [65]:
graph_sizes

12-element Vector{Any}:
    10
    20
    40
    80
   160
   320
   640
  1280
  2560
  5120
 10240
 20480