Skip to content
Newer
Older
100644 75 lines (58 sloc) 2.01 KB
0d5d3bb @youngnh added html-selector project and post
authored
1 (ns twoguys.html-selector
86c7687 @youngnh rushed impls of dom-seqs
authored
2 (:require [clojure.zip :as zip])
66ba707 @youngnh html-selector - class selectors
authored
3 (:use [clojure.string :only (split)]
4 [clojure.contrib.core :only (.?.)]
5 [clojure.java.io :only (reader)])
0d5d3bb @youngnh added html-selector project and post
authored
6 (:import [nu.validator.htmlparser.dom HtmlDocumentBuilder]
3685e03 @youngnh html-selector - element selectors
authored
7 [org.w3c.dom Document Node]
0d5d3bb @youngnh added html-selector project and post
authored
8 [org.xml.sax InputSource]))
9
10 (defn build-document [file-name]
11 (.parse (HtmlDocumentBuilder.) (InputSource. (reader file-name))))
12
13 (defn id-sel [document id]
14 (let [id (.substring id 1)]
bca48f2 @youngnh added text-selector, calls getTextContent
authored
15 (list (.getElementById document id))))
0d5d3bb @youngnh added html-selector project and post
authored
16
17 (defn nodelist-seq [node-list]
18 (letfn [(internal [i]
19 (lazy-seq
20 (when (< i (.getLength node-list))
21 (cons (.item node-list i) (internal (inc i))))))]
22 (internal 0)))
23
88ea813 @youngnh dom-seq implemented on top of lazy-cat
authored
24 (defn dom-seq [root-node]
86c7687 @youngnh rushed impls of dom-seqs
authored
25 (let [children (nodelist-seq (.getChildNodes root-node))]
3685e03 @youngnh html-selector - element selectors
authored
26 (lazy-cat
86c7687 @youngnh rushed impls of dom-seqs
authored
27 children
3685e03 @youngnh html-selector - element selectors
authored
28 (when-not (empty? children)
86c7687 @youngnh rushed impls of dom-seqs
authored
29 (mapcat dom-seq children)))))
3685e03 @youngnh html-selector - element selectors
authored
30
31 (defn element-tagname [elt]
32 (when (= Node/ELEMENT_NODE (.getNodeType elt))
66ba707 @youngnh html-selector - class selectors
authored
33 (.getNodeName elt)))
34
16a0c3c @youngnh selectors operate on single nodes
authored
35 (defmulti element-sel (fn [node elt-name]
36 (condp instance? node
2b9c600 @youngnh refactored to a unifying $ selector
authored
37 Document Document
16a0c3c @youngnh selectors operate on single nodes
authored
38 Node)))
2b9c600 @youngnh refactored to a unifying $ selector
authored
39
40 (defmethod element-sel Document [document elt-name]
41 (nodelist-seq (.getElementsByTagName document elt-name)))
42
16a0c3c @youngnh selectors operate on single nodes
authored
43 (defmethod element-sel Node [node elt-name]
86c7687 @youngnh rushed impls of dom-seqs
authored
44 (filter #(= elt-name (element-tagname %)) (dom-seq node)))
2b9c600 @youngnh refactored to a unifying $ selector
authored
45
66ba707 @youngnh html-selector - class selectors
authored
46 (defn get-attribute [elt attr]
47 (.?. elt getAttributes (getNamedItem attr) getValue))
48
49 (defn hasclass? [elt class]
50 (when-let [class-attr (get-attribute elt "class")]
51 (some #(= class %) (split class-attr #" "))))
52
16a0c3c @youngnh selectors operate on single nodes
authored
53 (defn class-sel [node class]
86c7687 @youngnh rushed impls of dom-seqs
authored
54 (filter #(hasclass? % (.substring class 1)) (dom-seq node)))
2b9c600 @youngnh refactored to a unifying $ selector
authored
55
bca48f2 @youngnh added text-selector, calls getTextContent
authored
56 (defmulti compile-selector type)
57
58 (defmethod compile-selector clojure.lang.IFn [f]
59 f)
60
61 (defmethod compile-selector String [s]
2b9c600 @youngnh refactored to a unifying $ selector
authored
62 (condp = (.charAt s 0)
63 \# #(id-sel % s)
64 \. #(class-sel % s)
65 #(element-sel % s)))
66
bca48f2 @youngnh added text-selector, calls getTextContent
authored
67 (defn text-sel [node]
68 (list (.getTextContent node)))
69
70 (defn flip [f]
71 (fn [& args]
72 (apply f (reverse args))))
73
16a0c3c @youngnh selectors operate on single nodes
authored
74 (defn $ [node & selectors]
bca48f2 @youngnh added text-selector, calls getTextContent
authored
75 (reduce (flip mapcat) [node] (map compile-selector selectors)))
Something went wrong with that request. Please try again.