-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.cljc
236 lines (206 loc) · 7.61 KB
/
parse.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
(ns com.yetanalytics.pathetic.parse
"JSONPath parsing, alongside parse validation and parsed path -> string
functions."
(:require #?(:clj [clojure.core.match :as m]
:cljs [cljs.core.match :as m])
[clojure.spec.alpha :as s]
[clojure.string :as cstr]
[clojure.walk :as w]
[instaparse.core :as insta]
[com.yetanalytics.pathetic.path :as path]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Specs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(s/def :failure/tag keyword?)
(s/def :failure/expecting any?) ;; Just need that keyword to exist
(s/def :failure/reason (s/coll-of (s/keys :req-un [:failure/tag
:failure/expecting])))
(s/def :failure/index int?)
(s/def ::parse-failure (s/keys :req-un [:failure/index
:failure/reason]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Parser
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Grammar inspired from:
;; https://github.com/dchester/jsonpath/blob/master/lib/grammar.js
;;
;; Integer and string literal regexes inspired from:
;; https://github.com/dchester/jsonpath/blob/master/lib/dict.js
(def jsonpath-instaparser
(insta/parser
"jsonpaths := <ws> jsonpath (<ws> <'|'> <ws> jsonpath)* <ws>;
jsonpath := <root> children?;
<children> := child+;
child := bracket-child | dot-child;
<bracket-child> := double-dot? <'['> <ws> bracket-content <ws> <']'>;
<bracket-content> := wildcard | bracket-union;
bracket-union := union-element (<ws> <','> <ws> union-element)*;
<union-element> := int-literal | string-literal | array-slice;
array-slice := int-literal? ':' int-literal? (':' int-literal?)?;
<dot-child> := double-dot child-body | <dot> child-body;
<child-body> := wildcard | identifier;
identifier := #'[a-zA-Z0-9_\\-]+';
int-literal := #'-?[0-9]+';
string-literal := string-literal-sq | string-literal-dq
<string-literal-sq> := #'\\'(?:\\\\[\\'bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4}|[^\\'\\\\])*\\'';
<string-literal-dq> := #'\"(?:\\\\[\"bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4}|[^\"\\\\])*\"'
root := '$';
wildcard := '*';
double-dot := '..';
dot := '.';
ws := #'\\s*'
"))
#_{:clj-kondo/ignore [:unresolved-symbol]}
(defn- slice-list->slice-map [slice-list]
;; "start", "end", and "step" are singleton vectors as a result of w/postwalk
(m/match [slice-list]
[[":"]]
{:start :vec-lower :end :vec-higher :step 1}
[[":" ":"]]
{:start :vec-lower :end :vec-higher :step 1}
[[[start] ":"]]
{:start start :end :vec-higher :step 1}
[[":" [end]]]
{:start :vec-lower :end end :step 1}
[[[start] ":" [end]]]
{:start start :end end :step 1}
[[[start] ":" [end] ":"]]
{:start start :end end :step 1}
;; Variable steps
[[":" ":" [step]]]
(if (nat-int? step)
{:start :vec-lower :end :vec-higher :step step}
{:start :vec-higher :end :vec-lower :step step})
[[[start] ":" ":" [step]]]
(if (nat-int? step)
{:start start :end :vec-higher :step step}
{:start start :end :vec-lower :step step})
[[":" [end] ":" [step]]]
(if (nat-int? step)
{:start :vec-lower :end end :step step}
{:start :vec-higher :end end :step step})
[[[start] ":" [end] ":" [step]]]
{:start start :end end :step step}
:else
(throw (ex-info "Cannot process array slice"
{:type ::invalid-array-slice
:array-slice slice-list}))))
(defn- unquote-str [s]
;; Assume that quotes are symmetrical
(cond
;; \"foo\" or \'foo\'
(or (= \' (first s)) (= \" (first s)))
(subs s 1 (-> s count dec))
;; \\'foo\\'
(and (= \\ (first s)) (= \' (second s)))
(subs s 2 (-> s count dec dec))
;; foo
:else
s))
(defn- str->int
[int-str]
#?(:clj (Long/parseLong int-str)
:cljs (js/parseInt int-str)))
(defn- instaparse-node->pathetic
[parsed]
(if (coll? parsed)
(case (first parsed)
:jsonpaths (->> parsed rest vec)
:jsonpath (->> parsed rest (apply concat) vec)
:child (->> parsed rest vec)
:bracket-union (-> parsed rest flatten vec)
;; Child nodes
:array-slice (slice-list->slice-map (apply vector (rest parsed)))
:identifier [(second parsed)]
:string-literal [(-> parsed second unquote-str)]
:int-literal [(-> parsed second str->int)]
:wildcard '*
:double-dot '..)
parsed))
(defn instaparse->pathetic
[parsed]
(w/postwalk instaparse-node->pathetic parsed))
;; Mini-monad to deal with error threading
(defn- parse-bind
[m f]
(if-not (s/valid? ::parse-failure m) (f m) m))
(s/fdef parse
:args (s/cat :path string?)
:ret (s/or :success ::path/paths
:failure ::parse-failure))
(defn parse
"Given a JSON-path, parse it into data. Returns a vector of parsed
JSON-paths, or the first error map if one or more paths are
invalid."
[jsonpath-str]
(let [parse-res (insta/parse jsonpath-instaparser jsonpath-str)]
(parse-bind parse-res instaparse->pathetic)))
(s/fdef parse-first
:args (s/cat :path string?)
:ret (s/or :success ::path/path
:failure ::parse-failure))
(defn parse-first
"Same as `parse`, but returns the first parsed JSON-path, or `nil`
if the paths are invalid."
[jsonpath-str]
(let [parse-res (parse jsonpath-str)]
(parse-bind parse-res first)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Validation Functions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(s/fdef is-parse-failure?
:args (s/cat :x (s/or :success ::path/paths
:failure ::parse-failure))
:ret boolean?)
(defn is-parse-failure?
"Returns true if the given object is error data from a parse
failure, false otherwise."
[x]
(s/valid? ::parse-failure x))
(s/fdef test-strict-path
:args (s/cat :parsed-path ::path/path)
:ret (s/nilable ::path/element))
(defn test-strict-path
"Test if a parsed path is valid in strict mode. If so, returns
nil; if not, then returns the first non-strict element, which
is any one of the following:
- Recursive descent operator (\"..\")
- Array slicess
- Negative array indices"
[parsed-path]
(->> parsed-path
(filter (fn [e] (not (s/valid? ::path/strict-element e))))
first))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Parse Undo
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(s/fdef path->string
:args (s/cat :parsed-path ::path/path)
:ret string?
:fn (fn [{:keys [args ret]}] (= (:parsed-path args) (parse-first ret))))
(defn- sub-element->str
[sub-element]
(cond
;; Array slice
(map? sub-element)
(let [{:keys [start end step]} sub-element
start (if (keyword? start) nil start)
end (if (keyword? end) nil end)]
(str start ":" end ":" step))
;; String key
(string? sub-element)
(str "'" sub-element "'")
;; Integer key
:else
(str sub-element)))
(defn- element->str
[element]
(cond
(= '* element) "[*]"
(= '.. element) ".."
:else (str "[" (->> element (map sub-element->str) (cstr/join ",")) "]")))
(defn path->string
"Stringify a parsed path back into a JSONPath string, using brackets
for all keys."
[parsed-path]
(->> parsed-path (map element->str) cstr/join (str "$")))