-
Notifications
You must be signed in to change notification settings - Fork 4
/
bnode.cljc
158 lines (145 loc) · 5.67 KB
/
bnode.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
(ns com.yetanalytics.flint.validate.bnode
(:require [clojure.set :as cset]
[clojure.zip :as zip]))
(def bgp-dividers
#{:where/recurse :where/union :where/optional
:where/minus :where/graph :where/service
:where/service-silent :where/bind :where/values})
(defn- get-parent-loc
"Given a bnode's loc, return its parent (either a `:triple/vec`
or `:triple/nform` node)."
[loc]
(let [penultimate (-> loc zip/path last first)]
(case penultimate
:triple/vec
(-> loc ; [:ax/bnode ...]
zip/up ; [:triple/vec ...]
)
:triple/o
(-> loc ; [:ax/bnode ...]
zip/up ; [:triple/o ...]
zip/up ; [:triple/po ...]
zip/up ; [:triple/spo ...]
zip/up ; [:triple/nform ...]
)
:triple/spo
(-> loc ; [:ax/bnode ...]
zip/up ; [:triple/spo ...]
zip/up ; [:triple/nform ...]
))))
(defn- get-bgp-index
"The BGP path is the regular zip loc path appended with an index that
corresponds to that of the BGP in the WHERE vector. For example:
[:triple/vec ...] => 0
[:triple/nform ...] => 0
[:where/filter ...] => 0 ; FILTERs don't divide BGPs
[:where/optional ...] => X ; BGP divider
[:triple/nform ...] => 1
Note that this only works with locs that are immediate children
of `:where-sub/where` nodes.
"
[loc]
(let [lefts (zip/lefts loc)]
(count (filter (fn [[k _]] (bgp-dividers k)) lefts))))
(defn- get-where-index
[pnode nnode]
(let [indexed (map-indexed (fn [i x] [x i]) (second pnode))]
(some (fn [[x i]] (when (= x nnode) i)) indexed)))
(defn- annotated-path
"Create a coll of AST keywords where all `:where-sub/where`s are
followed by indices, either the index in the WHERE vector or the
index of the BGP (for the very last one)."
[loc]
(let [parent-loc (get-parent-loc loc)]
(loop [zip-path (zip/path parent-loc)
res-path []]
(let [?pnode (first zip-path)
?nnode (second zip-path)]
(cond
(and ?pnode
?nnode
(= :where-sub/where (first ?pnode)))
(recur (rest zip-path)
(conj res-path (first ?pnode) (get-where-index ?pnode ?nnode)))
(and ?pnode
(not ?nnode)
(= :where-sub/where (first ?pnode)))
(recur (rest zip-path)
(conj res-path (first ?pnode) (get-bgp-index parent-loc)))
?pnode
(recur (rest zip-path)
(conj res-path (first ?pnode)))
:else
res-path)))))
(defn- valid-bnode-locs?
"Given `locs`, return `false` if `bnode` is duplicated across multiple
BGPs, `true` otherwise."
[[bnode locs]]
(if (<= (count locs) 1)
true ; Can't have dupe bnodes if there's only one instance :p
(let [loc-paths (map (fn [loc] (mapv first (zip/path loc))) locs)
[wh non-wh] (split-with #(some #{:where-sub/where} %) loc-paths)
?wheres (not-empty wh)
?non-wheres (not-empty non-wh)]
(cond
;; Blank nodes only exist in a non-WHERE clause (e.g. CONSTRUCT,
;; INSERT DATA, or INSERT). Since only one such clause may exist
;; in a Query or Update, and since each counts as a single BGP,
;; we are done.
(and (not ?wheres)
?non-wheres)
true
;; Blank nodes exist in both a WHERE and non-WHERE clause. Since
;; those automatically count as two different BGPs, we are done.
(and ?wheres
?non-wheres)
false
;; Blank nodes only exist in WHERE clauses. They may all be in one
;; or more BGP, so we need to investigate further.
(and ?wheres
(not ?non-wheres))
(let [bgp-paths (map annotated-path locs)]
(apply = bgp-paths))
:else
(throw (ex-info "Blank nodes located in invalid locations!"
{:kind ::invalid-bnode-loc
:bnode bnode
:zip-locs locs}))))))
(defn- bnode-err-map
[bnode loc]
{:bnode bnode
;; Rather wasteful to call `annotated-path` twice, but this only
;; occurs during exn throwing so performance isn't a priority.
:path (annotated-path loc)})
(defn- bnode-locs->err-map
[bnode-locs]
(mapcat (fn [[bnode locs]] (map (partial bnode-err-map bnode) locs))
bnode-locs))
(defn validate-bnodes
"Given the map `node-m` between nodes and zipper locs, validate that
all bnodes satisfy the following conditions:
- They cannot be duplicated in different Basic Graph Patterns (BGPs).
- They cannot be duplicated across different Updates in a request.
Returns a pair between the union of `prev-bnodes` and the bnodes in
`node-m`, and a nilable error map."
([node-m]
(validate-bnodes #{} node-m))
([prev-bnodes node-m]
(let [bnode-locs (->> (:ax/bnode node-m)
(filter (fn [[bnode _]] (not= '_ bnode))))
new-bnodes (set (keys bnode-locs))
bnode-union (cset/union prev-bnodes new-bnodes)]
(if-some [bad-bnode-locs (->> bnode-locs
(filter (comp prev-bnodes first))
not-empty)]
[bnode-union
{:kind ::dupe-bnodes-update
:errors (bnode-locs->err-map bad-bnode-locs)
:prev-bnodes prev-bnodes}]
(if-some [bad-bnode-locs (->> bnode-locs
(filter (comp not valid-bnode-locs?))
not-empty)]
[bnode-union
{:kind ::dupe-bnodes-bgp
:errors (bnode-locs->err-map bad-bnode-locs)}]
[bnode-union nil])))))