/
xml_query_front.rb
315 lines (307 loc) · 9.31 KB
/
xml_query_front.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# -*- coding: utf-8 -*-
module Handsoap
#
# A simple frontend for parsing XML document with Xpath.
#
# This provides a unified interface for multiple xpath-capable dom-parsers,
# allowing seamless switching between the underlying implementations.
#
# A document is loaded using the function Handsoap::XmlQueryFront.parse_string, passing
# the xml source string and a driver, which can (currently) be one of:
#
# :rexml
# :nokogiri
# :libxml
#
# The resulting object is a wrapper, of the type Handsoap::XmlQueryFront::BaseDriver.
#
module XmlQueryFront
# This error is raised if the document didn't parse
class ParseError < RuntimeError; end
# Loads requirements for a driver.
#
# This function is implicitly called by +parse_string+.
def self.load_driver!(driver)
if driver == :rexml
require 'rexml/document'
elsif driver == :nokogiri
require 'nokogiri'
if Gem.loaded_specs['nokogiri'].version < Gem::Version.new('1.3.0')
raise "Incompatible version of Nokogiri. Please upgrade gem."
end
elsif driver == :libxml
require 'libxml'
else
raise "Unknown driver #{driver}"
end
return driver
end
# Returns a wrapped XML parser, using the requested driver.
#
# +driver+ can be one of the following:
# :rexml
# :nokogiri
# :libxml
def self.parse_string(xml_string, driver)
load_driver!(driver)
if driver == :rexml
doc = REXML::Document.new(xml_string)
raise ParseError.new if doc.root.nil?
XmlQueryFront::REXMLDriver.new(doc)
elsif driver == :nokogiri
doc = Nokogiri::XML(xml_string)
raise ParseError.new unless (doc && doc.root && doc.errors.empty?)
XmlQueryFront::NokogiriDriver.new(doc)
elsif driver == :libxml
begin
LibXML::XML::Error.set_handler &LibXML::XML::Error::QUIET_HANDLER
doc = XmlQueryFront::LibXMLDriver.new(LibXML::XML::Parser.string(xml_string).parse)
rescue ArgumentError, LibXML::XML::Error => ex
raise ParseError.new
end
end
end
# NodeSelection is a wrapper around Array, that implicitly delegates BaseDriver methods to the first element.
#
# It makes mapping code prettier, since you often need to access the first element of a selection.
class NodeSelection < Array
def to_i
self.first.to_i if self.any?
end
def to_f
self.first.to_f if self.any?
end
def to_boolean
self.first.to_boolean if self.any?
end
def to_date
self.first.to_date if self.any?
end
def to_s
self.first.to_s if self.any?
end
def node_name
self.first.node_name if self.any?
end
def xpath(expression, ns = nil)
self.first.xpath(expression, ns)
end
def /(expression)
self.first.xpath(expression)
end
def to_xml
self.first.to_xml if self.any?
end
def to_raw
self.first.to_raw if self.any?
end
end
# Wraps the underlying (native) xml driver, and provides a uniform interface.
module BaseDriver
def initialize(element, namespaces = {})
@element = element
@namespaces = namespaces
end
# Registers a prefix to refer to a namespace.
#
# You can either register a nemspace with this function or pass it explicitly to the +xpath+ method.
def add_namespace(prefix, uri)
@namespaces[prefix] = uri
end
# Checks that an xpath-query doesn't refer to any undefined prefixes in +ns+
def assert_prefixes!(expression, ns)
expression.scan(/([a-zA-Z_][a-zA-Z0-9_.-]*):[^:]+/).map{|m| m[0] }.each do |prefix|
raise "Undefined prefix '#{prefix}'" if ns[prefix].nil?
end
end
# Returns the value of the element as an integer.
#
# See +to_s+
def to_i
t = self.to_s
return if t.nil?
t.to_i
end
# Returns the value of the element as a float.
#
# See +to_s+
def to_f
t = self.to_s
return if t.nil?
t.to_f
end
# Returns the value of the element as an boolean.
#
# See +to_s+
def to_boolean
t = self.to_s
return if t.nil?
t.downcase == 'true'
end
# Returns the value of the element as a ruby Time object.
#
# See +to_s+
def to_date
t = self.to_s
return if t.nil?
Time.iso8601(t)
end
# Returns the inner text content of this element, or the value (if it's an attr or textnode).
#
# The output is a UTF-8 encoded string, without xml-entities.
def to_s
raise NotImplementedError.new
end
# Returns the underlying native element.
#
# You shouldn't need to use this, since doing so would void portability.
def native_element
@element
end
# Returns the node name of the current element.
def node_name
raise NotImplementedError.new
end
# Queries the document with XPath, relative to the current element.
#
# +ns+ Should be a Hash of prefix => namespace
#
# Returns a +NodeSelection+
#
# See add_namespace
def xpath(expression, ns = nil)
raise NotImplementedError.new
end
# Returns the outer XML for this element.
def to_xml
raise NotImplementedError.new
end
# Returns the outer XML for this element, preserving the original formatting.
def to_raw
raise NotImplementedError.new
end
# alias of +xpath+
def /(expression)
self.xpath(expression)
end
# Returns the attribute value of the underlying element.
#
# Shortcut for:
#
# (node/"@attribute_name").to_s
def [](attribute_name)
raise NotImplementedError.new
end
end
# Driver for +libxml+.
#
# http://libxml.rubyforge.org/
class LibXMLDriver
include BaseDriver
def node_name
@element.name
end
def xpath(expression, ns = nil)
ns = {} if ns.nil?
ns = @namespaces.merge(ns)
assert_prefixes!(expression, ns)
NodeSelection.new(@element.find(expression, ns.map{|k,v| "#{k}:#{v}" }).to_a.map{|node| LibXMLDriver.new(node, ns) })
end
def [](attribute_name)
raise ArgumentError.new unless attribute_name.kind_of? String
@element[attribute_name]
end
def to_xml
@element.to_s(:indent => true)
end
def to_raw
@element.to_s(:indent => false)
end
def to_s
if @element.kind_of? LibXML::XML::Attr
@element.value
else
@element.content
end
end
end
# Driver for +REXML+
#
# http://www.germane-software.com/software/rexml/
class REXMLDriver
include BaseDriver
def node_name
@element.name
end
def xpath(expression, ns = nil)
ns = {} if ns.nil?
ns = @namespaces.merge(ns)
assert_prefixes!(expression, ns)
NodeSelection.new(REXML::XPath.match(@element, expression, ns).map{|node| REXMLDriver.new(node, ns) })
end
def [](attribute_name)
raise ArgumentError.new unless attribute_name.kind_of? String
@element.attributes[attribute_name]
end
def to_xml
require 'rexml/formatters/pretty'
formatter = REXML::Formatters::Pretty.new
out = String.new
formatter.write(@element, out)
# patch for REXML's broken formatting
out.gsub(/>\n\s+([^<]+)\n\s+<\//, ">\\1</")
end
def to_raw
@element.to_s
end
def to_s
if @element.kind_of? REXML::Attribute
@element.value
else
@element.text
end
end
end
# Driver for +Nokogiri+
#
# http://nokogiri.rubyforge.org/nokogiri/
class NokogiriDriver
include BaseDriver
def node_name
@element.name
end
def xpath(expression, ns = nil)
ns = {} if ns.nil?
ns = @namespaces.merge(ns)
assert_prefixes!(expression, ns)
NodeSelection.new(@element.xpath(expression, ns).map{|node| NokogiriDriver.new(node, ns) })
end
def [](attribute_name)
raise ArgumentError.new unless attribute_name.kind_of? String
@element[attribute_name]
end
def to_xml
@element.serialize(:encoding => 'UTF-8')
end
def to_raw
@element.serialize(:encoding => 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
end
def to_s
if @element.kind_of?(Nokogiri::XML::Text) || @element.kind_of?(Nokogiri::XML::CDATA)
element = @element
elsif @element.kind_of?(Nokogiri::XML::Attr)
return @element.value
else
element = @element.children.first
end
return if element.nil?
# This looks messy because it is .. Nokogiri's interface is in a flux
if element.kind_of?(Nokogiri::XML::CDATA)
element.serialize(:encoding => 'UTF-8').gsub(/^<!\[CDATA\[/, "").gsub(/\]\]>$/, "")
else
element.serialize(:encoding => 'UTF-8').gsub('<', '<').gsub('>', '>').gsub('"', '"').gsub(''', "'").gsub('&', '&')
end
end
end
end
end