Skip to content

Commit

Permalink
treat single-value in-vars like literals, update TPC-H queries to use…
Browse files Browse the repository at this point in the history
… `:in`, add `query-plan-for` helper, fixes #1556
  • Loading branch information
jarohen committed Feb 23, 2022
1 parent 7ae9320 commit 6245d76
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 57 deletions.
12 changes: 7 additions & 5 deletions bench/src/xtdb/bench/tpch.clj
@@ -1,12 +1,14 @@
(ns xtdb.bench.tpch
(:require [xtdb.api :as xt]
[xtdb.bench :as bench]
[xtdb.fixtures.tpch :as tpch]))
(:require [xtdb.bench :as bench]
[xtdb.fixtures.tpch :as tpch]
[xtdb.api :as xt]))

(defn run-tpch-query [node n]
(xt/q (xt/db node) (assoc (get tpch/tpch-queries (dec n)) :timeout 120000)))
(tpch/run-query (xt/db node)
(-> (get tpch/tpch-queries (dec n))
(assoc :timeout 120000))))

(defn run-tpch-queries [node {:keys [scale-factor] :as opts}]
(defn run-tpch-queries [node {:keys [scale-factor]}]
(every? true? (for [n (range 1 23)]
(let [actual (run-tpch-query node n)]
(if (= 0.01 scale-factor)
Expand Down
25 changes: 16 additions & 9 deletions core/src/xtdb/query.clj
Expand Up @@ -1182,7 +1182,6 @@
(defn- triple-join-order [type->clauses in-var-cardinalities stats]
;; TODO make more use of in-var-cardinalities
(let [[type->clauses project-only-leaf-vars] (expand-leaf-preds type->clauses (set (keys in-var-cardinalities)) stats)

{triple-clauses :triple, range-clauses :range, pred-clauses :pred} type->clauses

collected-vars (collect-vars type->clauses)
Expand All @@ -1200,6 +1199,17 @@
:when (logic-var? sym)]
sym))

var->clauses (merge-with into
(group-by :v triple-clauses)
(group-by :e triple-clauses))

literal-vars (->> (keys var->clauses)
(into #{} (filter literal?)))

single-value-in-vars (->> in-var-cardinalities
(into #{} (comp (filter #(= (double (val %)) 1.0))
(map key))))

cardinality-for-var (fn [var cardinality]
(cond-> (double (cond
(literal? var) 0.0
Expand All @@ -1222,31 +1232,28 @@
update-cardinality (fn [acc {:keys [e a v] :as clause}]
(let [{:keys [self-join? ignore-v?]} (meta clause)
es (double (cardinality-for-var e (cond->> (double (db/eid-cardinality stats a))
(literal? v) (/ 1.0))))
(or (literal? v) (single-value-in-vars v)) (/ 1.0))))
vs (cond
ignore-v? Double/MAX_VALUE
self-join? (Math/nextUp es)
:else
(cardinality-for-var v (cond->> (double (db/value-cardinality stats a))
(literal? e) (/ 1.0))))]
(or (literal? e) (single-value-in-vars e)) (/ 1.0))))]
(-> acc
(update v (fnil min Double/MAX_VALUE) vs)
(update e (fnil min Double/MAX_VALUE) es))))

var->cardinality (doto (reduce update-cardinality {} triple-clauses)
(->> (log/debug :triple-joins-var->cardinality)))

var->clauses (merge-with into
(group-by :v triple-clauses)
(group-by :e triple-clauses))
start-vars (set/union literal-vars single-value-in-vars)

triple-clause-var-order (loop [vars (filter logic-var? (map key (sort-by val var->cardinality)))
join-order (->> (keys var->clauses) (filter literal?) set vec)
triple-clause-var-order (loop [vars (->> (sort-by val var->cardinality) (map key) (filter logic-var?) (remove #(contains? start-vars %)))
join-order (vec start-vars)
reachable-var-groups (list)]
(if-not (seq vars)
(vec (distinct join-order))


(let [var (first (or (not-empty (for [reachable-var-group reachable-var-groups
var (->> (filter reachable-var-group vars)
(partition-by var->cardinality)
Expand Down
13 changes: 7 additions & 6 deletions dev/dev.clj
Expand Up @@ -96,13 +96,14 @@
(tpch/load-docs! (dev/xtdb-node) 0.05)

(time
(count (xt/q (xt/db (xtdb-node))
(-> tpch/q5
(assoc :timeout 120000)))))
(count
(tpch/run-query (xt/db (xtdb-node))
(-> tpch/q5
(assoc :timeout 120000)))))

(time
(doseq [q tpch/tpch-queries]
(time
(xt/q (xt/db (xtdb-node))
(-> q
(assoc :timeout 120000)))))))
(tpch/run-query (xt/db (xtdb-node))
(-> q
(assoc :timeout 120000)))))))
4 changes: 2 additions & 2 deletions test/resources/xtdb/fixtures/tpch/current-join-orders.edn
@@ -1,8 +1,8 @@
[l_shipdate l l_linestatus l_tax l_returnflag l_extendedprice l_quantity l_discount]
[15 "EUROPE" p p_type ps s n r ps_supplycost s_comment s_name s_address s_phone s_acctbal n_name p_mfgr]
["BUILDING" c o o_orderdate l l_shipdate l_extendedprice o_shippriority l_discount]
[?segment c o o_orderdate l l_shipdate l_extendedprice o_shippriority l_discount]
[o_orderdate o o_orderpriority]
["ASIA" r n s l o o_orderdate c n_name l_extendedprice l_discount]
[?region r n s l o o_orderdate c n_name l_extendedprice l_discount]
[l_discount l l_quantity l_shipdate l_extendedprice]
[l_shipdate l_year l s n1 supp_nation o c n2 cust_nation l_extendedprice l_discount]
[brazil_volume volume o_year mkt_share]
Expand Down
80 changes: 47 additions & 33 deletions test/src/xtdb/fixtures/tpch.clj
Expand Up @@ -5,7 +5,8 @@
[clojure.string :as str]
[clojure.test :as t]
[xtdb.api :as xt]
[xtdb.fixtures :as fix :refer [*api*]])
[xtdb.fixtures :as fix :refer [*api*]]
[xtdb.query :as q])
(:import (io.airlift.tpch GenerateUtils TpchColumn TpchColumnType$Base TpchEntity TpchTable)
(java.util Date)))

Expand Down Expand Up @@ -94,6 +95,15 @@
(defn load-docs! [node & args]
(xt/await-tx node (apply submit-docs! node args)))

(defn- with-in-args [q in-args]
(-> q (vary-meta assoc ::in-args in-args)))

(defn run-query [db q]
(apply xt/q db q (::in-args (meta q))))

(defn query-plan-for [db q]
(q/query-plan-for db q (::in-args (meta q))))

;; NOTE: timings below are hot/cold, on my machine (Ryzen 7 5800X, 16GB RAM)
;; SF 0.05, against commit `d4437676`, 2022-02-04
;; they're not particularly scientifically measured, so worth not paying too much attention
Expand Down Expand Up @@ -169,23 +179,25 @@
;; "Elapsed time: 1355.255634 msecs"
;; "Elapsed time: 764.763165 msecs"
(def q3
'{:find [o
(sum (* l_extendedprice (- 1 l_discount)))
o_orderdate
o_shippriority]
:where [[c :c_mktsegment "BUILDING"]
[o :o_custkey c]
[o :o_shippriority o_shippriority]
[o :o_orderdate o_orderdate]
[(< o_orderdate #inst "1995-03-15")]
[l :l_orderkey o]
[l :l_discount l_discount]
[l :l_extendedprice l_extendedprice]
[l :l_shipdate l_shipdate]
[(> l_shipdate #inst "1995-03-15")]]
:order-by [[(sum (* l_extendedprice (- 1 l_discount))) :desc]
[o_orderdate :asc]]
:limit 10})
(-> '{:find [o
(sum (* l_extendedprice (- 1 l_discount)))
o_orderdate
o_shippriority]
:in [?segment]
:where [[c :c_mktsegment ?segment]
[o :o_custkey c]
[o :o_shippriority o_shippriority]
[o :o_orderdate o_orderdate]
[(< o_orderdate #inst "1995-03-15")]
[l :l_orderkey o]
[l :l_discount l_discount]
[l :l_extendedprice l_extendedprice]
[l :l_shipdate l_shipdate]
[(> l_shipdate #inst "1995-03-15")]]
:order-by [[(sum (* l_extendedprice (- 1 l_discount))) :desc]
[o_orderdate :asc]]
:limit 10}
(with-in-args ["BUILDING"])))

;; "Elapsed time: 621.653381 msecs"
;; "Elapsed time: 262.517773 msecs"
Expand All @@ -206,21 +218,23 @@
;; "Elapsed time: 3365.050276 msecs"
;; "Elapsed time: 1927.300129 msecs"
(def q5
'{:find [n_name (sum (* l_extendedprice (- 1 l_discount)))]
:where [[o :o_custkey c]
[l :l_orderkey o]
[l :l_suppkey s]
[s :s_nationkey n]
[c :c_nationkey n]
[n :n_name n_name]
[n :n_regionkey r]
[r :r_name "ASIA"]
[l :l_extendedprice l_extendedprice]
[l :l_discount l_discount]
[o :o_orderdate o_orderdate]
[(>= o_orderdate #inst "1994-01-01")]
[(< o_orderdate #inst "1995-01-01")]]
:order-by [[(sum (* l_extendedprice (- 1 l_discount))) :desc]]})
(-> '{:find [n_name (sum (* l_extendedprice (- 1 l_discount)))]
:in [?region]
:where [[o :o_custkey c]
[l :l_orderkey o]
[l :l_suppkey s]
[s :s_nationkey n]
[c :c_nationkey n]
[n :n_name n_name]
[n :n_regionkey r]
[r :r_name ?region]
[l :l_extendedprice l_extendedprice]
[l :l_discount l_discount]
[o :o_orderdate o_orderdate]
[(>= o_orderdate #inst "1994-01-01")]
[(< o_orderdate #inst "1995-01-01")]]
:order-by [[(sum (* l_extendedprice (- 1 l_discount))) :desc]]}
(with-in-args ["ASIA"])))

;; "Elapsed time: 995.197119 msecs"
;; "Elapsed time: 963.57298 msecs"
Expand Down
21 changes: 19 additions & 2 deletions test/test/xtdb/query_test.clj
Expand Up @@ -3625,6 +3625,23 @@
:vars-in-join-order
(filter #{'m 'e}))))))

(t/deftest prioritises-single-value-in-vars-like-literals-1556
(fix/submit+await-tx (->> (for [x (range 1000)]
[[::xt/put {:xt/id (str "thing-" x), :type :thing, :v x}]
[::xt/put {:xt/id (str "other-thing-" x), :type :other-thing, :v x}]])
(apply concat)))

(let [db (xt/db *api*)]
(t/is (= '[v e]
(->> (q/query-plan-for db
'{:find [e]
:in [v]
:where [[e :type :thing]
[e :v v]]}
[590])
:vars-in-join-order
(filter #{'e 'v}))))))

(t/deftest test-binds-against-false-arg-885
(fix/submit+await-tx [[::xt/put {:xt/id :foo, :name "foo", :flag? false}]
[::xt/put {:xt/id :bar, :name "bar", :flag? true}]
Expand Down Expand Up @@ -3884,7 +3901,7 @@
[::xt/put {:xt/id (UUID/randomUUID)
:type "extra type"}]))

(t/is (= '[?name ?e ?type]
(t/is (= '[?type ?name ?e]
(-> (q/query-plan-for (xt/db *api*) query
["person-104" :person])
:vars-in-join-order)))
Expand All @@ -3894,7 +3911,7 @@
[::xt/put {:xt/id (UUID/randomUUID)
:name "another extra name"}]])

(t/is (= '[?name ?e ?type]
(t/is (= '[?type ?name ?e]
(-> (q/query-plan-for (xt/db *api*) query
["person-104" :person])
:vars-in-join-order)))))
Expand Down

0 comments on commit 6245d76

Please sign in to comment.