/
XPath.class.php
6265 lines (5717 loc) · 270 KB
/
XPath.class.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<?php
/**
* Php.XPath
*
* +======================================================================================================+
* | A php class for searching an XML document using XPath, and making modifications using a DOM
* | style API. Does not require the DOM XML PHP library.
* |
* +======================================================================================================+
* | What Is XPath:
* | --------------
* | - "What SQL is for a relational database, XPath is for an XML document." -- Sam Blum
* | - "The primary purpose of XPath is to address parts of an XML document. In support of this
* | primary purpose, it also provides basic facilities for manipulting it." -- W3C
* |
* | XPath in action and a very nice intro is under:
* | http://www.zvon.org/xxl/XPathTutorial/General/examples.html
* | Specs Can be found under:
* | http://www.w3.org/TR/xpath W3C XPath Recommendation
* | http://www.w3.org/TR/xpath20 W3C XPath Recommendation
* |
* | NOTE: Most of the XPath-spec has been realized, but not all. Usually this should not be
* | problem as the missing part is either rarely used or it's simpler to do with PHP itself.
* +------------------------------------------------------------------------------------------------------+
* | Requires PHP version 4.0.5 and up
* +------------------------------------------------------------------------------------------------------+
* | Main Active Authors:
* | --------------------
* | Nigel Swinson <nigelswinson@users.sourceforge.net>
* | Started around 2001-07, saved phpxml from near death and renamed to Php.XPath
* | Restructured XPath code to stay in line with XPath spec.
* | Sam Blum <bs_php@infeer.com>
* | Started around 2001-09 1st major restruct (V2.0) and testbench initiator.
* | 2nd (V3.0) major rewrite in 2002-02
* | Daniel Allen <bigredlinux@yahoo.com>
* | Started around 2001-10 working to make Php.XPath adhere to specs
* | Main Former Author: Michael P. Mehl <mpm@phpxml.org>
* | Inital creator of V 1.0. Stoped activities around 2001-03
* +------------------------------------------------------------------------------------------------------+
* | Code Structure:
* | --------------_
* | The class is split into 3 main objects. To keep usability easy all 3
* | objects are in this file (but may be split in 3 file in future).
* | +-------------+
* | | XPathBase | XPathBase holds general and debugging functions.
* | +------+------+
* | v
* | +-------------+ XPathEngine is the implementation of the W3C XPath spec. It contains the
* | | XPathEngine | XML-import (parser), -export and can handle xPathQueries. It's a fully
* | +------+------+ functional class but has no functions to modify the XML-document (see following).
* | v
* | +-------------+
* | | XPath | XPath extends the functionality with actions to modify the XML-document.
* | +-------------+ We tryed to implement a DOM - like interface.
* +------------------------------------------------------------------------------------------------------+
* | Usage:
* | ------
* | Scroll to the end of this php file and you will find a short sample code to get you started
* +------------------------------------------------------------------------------------------------------+
* | Glossary:
* | ---------
* | To understand how to use the functions and to pass the right parameters, read following:
* |
* | Document: (full node tree, XML-tree)
* | After a XML-source has been imported and parsed, it's stored as a tree of nodes sometimes
* | refered to as 'document'.
* |
* | AbsoluteXPath: (xPath, xPathSet)
* | A absolute XPath is a string. It 'points' to *one* node in the XML-document. We use the
* | term 'absolute' to emphasise that it is not an xPath-query (see xPathQuery). A valid xPath
* | has the form like '/AAA[1]/BBB[2]/CCC[1]'. Usually functions that require a node (see Node)
* | will also accept an abs. XPath.
* |
* | Node: (node, nodeSet, node-tree)
* | Some funtions require or return a node (or a whole node-tree). Nodes are only used with the
* | XPath-interface and have an internal structure. Every node in a XML document has a unique
* | corresponding abs. xPath. That's why public functions that accept a node, will usually also
* | accept a abs. xPath (a string) 'pointing' to an existing node (see absolutXPath).
* |
* | XPathQuery: (xquery, query)
* | A xPath-query is a string that is matched against the XML-document. The result of the match
* | is a xPathSet (vector of xPath's). It's always possible to pass a single absoluteXPath
* | instead of a xPath-query. A valid xPathQuery could look like this:
* | '//XXX/*[contains(., "foo")]/..' (See the link in 'What Is XPath' to learn more).
* |
* |
* +------------------------------------------------------------------------------------------------------+
* | Internals:
* | ----------
* | - The Node Tree
* | -------------
* | A central role of the package is how the XML-data is stored. The whole data is in a node-tree.
* | A node can be seen as the equvalent to a tag in the XML soure with some extra info.
* | For instance the following XML
* | <AAA foo="x">***<BBB/><CCC/>**<BBB/>*</AAA>
* | Would produce folowing node-tree:
* | 'super-root' <-- $nodeRoot (Very handy)
* | |
* | 'depth' 0 AAA[1] <-- top node. The 'textParts' of this node would be
* | / | \ 'textParts' => array('***','','**','*')
* | 'depth' 1 BBB[1] CCC[1] BBB[2] (NOTE: Is always size of child nodes+1)
* | - The Node
* | --------
* | The node itself is an structure desiged mainly to be used in connection with the interface of PHP.XPath.
* | That means it's possible for functions to return a sub-node-tree that can be used as input of an other
* | PHP.XPath function.
* |
* | The main structure of a node is:
* | $node = array(
* | 'name' => '', # The tag name. E.g. In <FOO bar="aaa"/> it would be 'FOO'
* | 'attributes' => array(), # The attributes of the tag E.g. In <FOO bar="aaa"/> it would be array('bar'=>'aaa')
* | 'textParts' => array(), # Array of text parts surrounding the children E.g. <FOO>aa<A>bb<B/>cc</A>dd</FOO> -> array('aa','bb','cc','dd')
* | 'childNodes' => array(), # Array of refences (pointers) to child nodes.
* |
* | For optimisation reasions some additional data is stored in the node too:
* | 'parentNode' => NULL # Reference (pointer) to the parent node (or NULL if it's 'super root')
* | 'depth' => 0, # The tag depth (or tree level) starting with the root tag at 0.
* | 'pos' => 0, # Is the zero-based position this node has in the parent's 'childNodes'-list.
* | 'contextPos' => 1, # Is the one-based position this node has by counting the siblings tags (tags with same name)
* | 'xpath' => '' # Is the abs. XPath to this node.
* | 'generated_id'=> '' # The id returned for this node by generate-id() (attribute and text nodes not supported)
* |
* | - The NodeIndex
* | -------------
* | Every node in the tree has an absolute XPath. E.g '/AAA[1]/BBB[2]' the $nodeIndex is a hash array
* | to all the nodes in the node-tree. The key used is the absolute XPath (a string).
* |
* +------------------------------------------------------------------------------------------------------+
* | License:
* | --------
* | The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License");
* | you may not use this file except in compliance with the License. You may obtain a copy of the
* | License at http://www.mozilla.org/MPL/
* |
* | Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY
* | OF ANY KIND, either express or implied. See the License for the specific language governing
* | rights and limitations under the License.
* |
* | The Original Code is <phpXML/>.
* |
* | The Initial Developer of the Original Code is Michael P. Mehl. Portions created by Michael
* | P. Mehl are Copyright (C) 2001 Michael P. Mehl. All Rights Reserved.
* |
* | Contributor(s): N.Swinson / S.Blum / D.Allen
* |
* | Alternatively, the contents of this file may be used under the terms of either of the GNU
* | General Public License Version 2 or later (the "GPL"), or the GNU Lesser General Public
* | License Version 2.1 or later (the "LGPL"), in which case the provisions of the GPL or the
* | LGPL License are applicable instead of those above. If you wish to allow use of your version
* | of this file only under the terms of the GPL or the LGPL License and not to allow others to
* | use your version of this file under the MPL, indicate your decision by deleting the
* | provisions above and replace them with the notice and other provisions required by the
* | GPL or the LGPL License. If you do not delete the provisions above, a recipient may use
* | your version of this file under either the MPL, the GPL or the LGPL License.
* |
* +======================================================================================================+
*
* @author S.Blum / N.Swinson / D.Allen / (P.Mehl)
* @link http://sourceforge.net/projects/phpxpath/
* @version 3.5
* @CVS $Id: XPath.class.php,v 1.148 2004/08/13 11:47:36 nigelswinson Exp $
*/
// Include guard, protects file being included twice
$ConstantName = 'INCLUDED_'.strtoupper(__FILE__);
if (defined($ConstantName)) return;
define($ConstantName,1, TRUE);
/************************************************************************************************
* ===============================================================================================
* X P a t h B a s e - Class
* ===============================================================================================
************************************************************************************************/
class XPathBase {
var $_lastError;
// As debugging of the xml parse is spread across several functions, we need to make this a member.
var $bDebugXmlParse = FALSE;
// Used to help navigate through the begin/end debug calls
var $iDebugNextLinkNumber = 1;
var $aDebugOpenLinks = array();
var $aDebugFunctions = array(
//'_evaluateStep',
//'_evaluatePrimaryExpr',
//'_evaluateExpr',
//'_evaluateStep',
//'_checkPredicates',
//'_evaluateFunction',
//'_evaluateOperator',
//'_evaluatePathExpr',
);
/**
* Constructor
*/
function XPathBase() {
# $this->bDebugXmlParse = TRUE;
$this->properties['verboseLevel'] = 1; // 0=silent, 1 and above produce verbose output (an echo to screen).
if (!isSet($_ENV)) { // Note: $_ENV introduced in 4.1.0. In earlier versions, use $HTTP_ENV_VARS.
$_ENV = $GLOBALS['HTTP_ENV_VARS'];
}
// Windows 95/98 do not support file locking. Detecting OS (Operation System) and setting the
// properties['OS_supports_flock'] to FALSE if win 95/98 is detected.
// This will surpress the file locking error reported from win 98 users when exportToFile() is called.
// May have to add more OS's to the list in future (Macs?).
// ### Note that it's only the FAT and NFS file systems that are really a problem. NTFS and
// the latest php libs do support flock()
$_ENV['OS'] = isSet($_ENV['OS']) ? $_ENV['OS'] : 'Unknown OS';
switch ($_ENV['OS']) {
case 'Windows_95':
case 'Windows_98':
case 'Unknown OS':
// should catch Mac OS X compatible environment
if (!empty($_SERVER['SERVER_SOFTWARE'])
&& preg_match('/Darwin/',$_SERVER['SERVER_SOFTWARE'])) {
// fall-through
} else {
$this->properties['OS_supports_flock'] = FALSE;
break;
}
default:
$this->properties['OS_supports_flock'] = TRUE;
}
}
/**
* Resets the object so it's able to take a new xml sting/file
*
* Constructing objects is slow. If you can, reuse ones that you have used already
* by using this reset() function.
*/
function reset() {
$this->_lastError = '';
}
//-----------------------------------------------------------------------------------------
// XPathBase ------ Helpers ------
//-----------------------------------------------------------------------------------------
/**
* This method checks the right amount and match of brackets
*
* @param $term (string) String in which is checked.
* @return (bool) TRUE: OK / FALSE: KO
*/
function _bracketsCheck($term) {
$leng = strlen($term);
$brackets = 0;
$bracketMisscount = $bracketMissmatsh = FALSE;
$stack = array();
for ($i=0; $i<$leng; $i++) {
switch ($term[$i]) {
case '(' :
case '[' :
$stack[$brackets] = $term[$i];
$brackets++;
break;
case ')':
$brackets--;
if ($brackets<0) {
$bracketMisscount = TRUE;
break 2;
}
if ($stack[$brackets] != '(') {
$bracketMissmatsh = TRUE;
break 2;
}
break;
case ']' :
$brackets--;
if ($brackets<0) {
$bracketMisscount = TRUE;
break 2;
}
if ($stack[$brackets] != '[') {
$bracketMissmatsh = TRUE;
break 2;
}
break;
}
}
// Check whether we had a valid number of brackets.
if ($brackets != 0) $bracketMisscount = TRUE;
if ($bracketMisscount || $bracketMissmatsh) {
return FALSE;
}
return TRUE;
}
/**
* Looks for a string within another string -- BUT the search-string must be located *outside* of any brackets.
*
* This method looks for a string within another string. Brackets in the
* string the method is looking through will be respected, which means that
* only if the string the method is looking for is located outside of
* brackets, the search will be successful.
*
* @param $term (string) String in which the search shall take place.
* @param $expression (string) String that should be searched.
* @return (int) This method returns -1 if no string was found,
* otherwise the offset at which the string was found.
*/
function _searchString($term, $expression) {
$bracketCounter = 0; // Record where we are in the brackets.
$leng = strlen($term);
$exprLeng = strlen($expression);
for ($i=0; $i<$leng; $i++) {
$char = $term[$i];
if ($char=='(' || $char=='[') {
$bracketCounter++;
continue;
}
elseif ($char==')' || $char==']') {
$bracketCounter--;
}
if ($bracketCounter == 0) {
// Check whether we can find the expression at this index.
if (substr($term, $i, $exprLeng) == $expression) return $i;
}
}
// Nothing was found.
return (-1);
}
/**
* Split a string by a searator-string -- BUT the separator-string must be located *outside* of any brackets.
*
* Returns an array of strings, each of which is a substring of string formed
* by splitting it on boundaries formed by the string separator.
*
* @param $separator (string) String that should be searched.
* @param $term (string) String in which the search shall take place.
* @return (array) see above
*/
function _bracketExplode($separator, $term) {
// Note that it doesn't make sense for $separator to itself contain (,),[ or ],
// but as this is a private function we should be ok.
$resultArr = array();
$bracketCounter = 0; // Record where we are in the brackets.
do { // BEGIN try block
// Check if any separator is in the term
$sepLeng = strlen($separator);
if (strpos($term, $separator)===FALSE) { // no separator found so end now
$resultArr[] = $term;
break; // try-block
}
// Make a substitute separator out of 'unused chars'.
$substituteSep = str_repeat(chr(2), $sepLeng);
// Now determine the first bracket '(' or '['.
$tmp1 = strpos($term, '(');
$tmp2 = strpos($term, '[');
if ($tmp1===FALSE) {
$startAt = (int)$tmp2;
} elseif ($tmp2===FALSE) {
$startAt = (int)$tmp1;
} else {
$startAt = min($tmp1, $tmp2);
}
// Get prefix string part before the first bracket.
$preStr = substr($term, 0, $startAt);
// Substitute separator in prefix string.
$preStr = str_replace($separator, $substituteSep, $preStr);
// Now get the rest-string (postfix string)
$postStr = substr($term, $startAt);
// Go all the way through the rest-string.
$strLeng = strlen($postStr);
for ($i=0; $i < $strLeng; $i++) {
$char = $postStr[$i];
// Spot (,),[,] and modify our bracket counter. Note there is an
// assumption here that you don't have a string(with[mis)matched]brackets.
// This should be ok as the dodgy string will be detected elsewhere.
if ($char=='(' || $char=='[') {
$bracketCounter++;
continue;
}
elseif ($char==')' || $char==']') {
$bracketCounter--;
}
// If no brackets surround us check for separator
if ($bracketCounter == 0) {
// Check whether we can find the expression starting at this index.
if ((substr($postStr, $i, $sepLeng) == $separator)) {
// Substitute the found separator
for ($j=0; $j<$sepLeng; $j++) {
$postStr[$i+$j] = $substituteSep[$j];
}
}
}
}
// Now explod using the substitute separator as key.
$resultArr = explode($substituteSep, $preStr . $postStr);
} while (FALSE); // End try block
// Return the results that we found. May be a array with 1 entry.
return $resultArr;
}
/**
* Split a string at it's groups, ie bracketed expressions
*
* Returns an array of strings, when concatenated together would produce the original
* string. ie a(b)cde(f)(g) would map to:
* array ('a', '(b)', cde', '(f)', '(g)')
*
* @param $string (string) The string to process
* @param $open (string) The substring for the open of a group
* @param $close (string) The substring for the close of a group
* @return (array) The parsed string, see above
*/
function _getEndGroups($string, $open='[', $close=']') {
// Note that it doesn't make sense for $separator to itself contain (,),[ or ],
// but as this is a private function we should be ok.
$resultArr = array();
do { // BEGIN try block
// Check if we have both an open and a close tag
if (empty($open) and empty($close)) { // no separator found so end now
$resultArr[] = $string;
break; // try-block
}
if (empty($string)) {
$resultArr[] = $string;
break; // try-block
}
while (!empty($string)) {
// Now determine the first bracket '(' or '['.
$openPos = strpos($string, $open);
$closePos = strpos($string, $close);
if ($openPos===FALSE || $closePos===FALSE) {
// Oh, no more groups to be found then. Quit
$resultArr[] = $string;
break;
}
// Sanity check
if ($openPos > $closePos) {
// Malformed string, dump the rest and quit.
$resultArr[] = $string;
break;
}
// Get prefix string part before the first bracket.
$preStr = substr($string, 0, $openPos);
// This is the first string that will go in our output
if (!empty($preStr))
$resultArr[] = $preStr;
// Skip over what we've proceed, including the open char
$string = substr($string, $openPos + 1 - strlen($string));
// Find the next open char and adjust our close char
//echo "close: $closePos\nopen: $openPos\n\n";
$closePos -= $openPos + 1;
$openPos = strpos($string, $open);
//echo "close: $closePos\nopen: $openPos\n\n";
// While we have found nesting...
while ($openPos && $closePos && ($closePos > $openPos)) {
// Find another close pos after the one we are looking at
$closePos = strpos($string, $close, $closePos + 1);
// And skip our open
$openPos = strpos($string, $open, $openPos + 1);
}
//echo "close: $closePos\nopen: $openPos\n\n";
// If we now have a close pos, then it's the end of the group.
if ($closePos === FALSE) {
// We didn't... so bail dumping what was left
$resultArr[] = $open.$string;
break;
}
// We did, so we can extract the group
$resultArr[] = $open.substr($string, 0, $closePos + 1);
// Skip what we have processed
$string = substr($string, $closePos + 1);
}
} while (FALSE); // End try block
// Return the results that we found. May be a array with 1 entry.
return $resultArr;
}
/**
* Retrieves a substring before a delimiter.
*
* This method retrieves everything from a string before a given delimiter,
* not including the delimiter.
*
* @param $string (string) String, from which the substring should be extracted.
* @param $delimiter (string) String containing the delimiter to use.
* @return (string) Substring from the original string before the delimiter.
* @see _afterstr()
*/
function _prestr(&$string, $delimiter, $offset=0) {
// Return the substring.
$offset = ($offset<0) ? 0 : $offset;
$pos = strpos($string, $delimiter, $offset);
if ($pos===FALSE) return $string; else return substr($string, 0, $pos);
}
/**
* Retrieves a substring after a delimiter.
*
* This method retrieves everything from a string after a given delimiter,
* not including the delimiter.
*
* @param $string (string) String, from which the substring should be extracted.
* @param $delimiter (string) String containing the delimiter to use.
* @return (string) Substring from the original string after the delimiter.
* @see _prestr()
*/
function _afterstr($string, $delimiter, $offset=0) {
$offset = ($offset<0) ? 0 : $offset;
// Return the substring.
return substr($string, strpos($string, $delimiter, $offset) + strlen($delimiter));
}
//-----------------------------------------------------------------------------------------
// XPathBase ------ Debug Stuff ------
//-----------------------------------------------------------------------------------------
/**
* Alter the verbose (error) level reporting.
*
* Pass an int. >0 to turn on, 0 to turn off. The higher the number, the
* higher the level of verbosity. By default, the class has a verbose level
* of 1.
*
* @param $levelOfVerbosity (int) default is 1 = on
*/
function setVerbose($levelOfVerbosity = 1) {
$level = -1;
if ($levelOfVerbosity === TRUE) {
$level = 1;
} elseif ($levelOfVerbosity === FALSE) {
$level = 0;
} elseif (is_numeric($levelOfVerbosity)) {
$level = $levelOfVerbosity;
}
if ($level >= 0) $this->properties['verboseLevel'] = $levelOfVerbosity;
}
/**
* Returns the last occured error message.
*
* @access public
* @return string (may be empty if there was no error at all)
* @see _setLastError(), _lastError
*/
function getLastError() {
return $this->_lastError;
}
/**
* Creates a textual error message and sets it.
*
* example: 'XPath error in THIS_FILE_NAME:LINE. Message: YOUR_MESSAGE';
*
* I don't think the message should include any markup because not everyone wants to debug
* into the browser window.
*
* You should call _displayError() rather than _setLastError() if you would like the message,
* dependant on their verbose settings, echoed to the screen.
*
* @param $message (string) a textual error message default is ''
* @param $line (int) the line number where the error occured, use __LINE__
* @see getLastError()
*/
function _setLastError($message='', $line='-', $file='-') {
$this->_lastError = 'XPath error in ' . basename($file) . ':' . $line . '. Message: ' . $message;
}
/**
* Displays an error message.
*
* This method displays an error messages depending on the users verbose settings
* and sets the last error message.
*
* If also possibly stops the execution of the script.
* ### Terminate should not be allowed --fab. Should it?? N.S.
*
* @param $message (string) Error message to be displayed.
* @param $lineNumber (int) line number given by __LINE__
* @param $terminate (bool) (default TURE) End the execution of this script.
*/
function _displayError($message, $lineNumber='-', $file='-', $terminate=TRUE) {
// Display the error message.
$err = '<b>XPath error in '.basename($file).':'.$lineNumber.'</b> '.$message."<br \>\n";
$this->_setLastError($message, $lineNumber, $file);
if (($this->properties['verboseLevel'] > 0) OR ($terminate)) echo $err;
// End the execution of this script.
if ($terminate) exit;
}
/**
* Displays a diagnostic message
*
* This method displays an error messages
*
* @param $message (string) Error message to be displayed.
* @param $lineNumber (int) line number given by __LINE__
*/
function _displayMessage($message, $lineNumber='-', $file='-') {
// Display the error message.
$err = '<b>XPath message from '.basename($file).':'.$lineNumber.'</b> '.$message."<br \>\n";
if ($this->properties['verboseLevel'] > 0) echo $err;
}
/**
* Called to begin the debug run of a function.
*
* This method starts a <DIV><PRE> tag so that the entry to this function
* is clear to the debugging user. Call _closeDebugFunction() at the
* end of the function to create a clean box round the function call.
*
* @author Nigel Swinson <nigelswinson@users.sourceforge.net>
* @author Sam Blum <bs_php@infeer.com>
* @param $functionName (string) the name of the function we are beginning to debug
* @return (array) the output from the microtime() function.
* @see _closeDebugFunction()
*/
function _beginDebugFunction($functionName) {
$fileName = basename(__FILE__);
static $color = array('green','blue','red','lime','fuchsia', 'aqua');
static $colIndex = -1;
$colIndex++;
echo '<div style="clear:both" align="left"> ';
echo '<pre STYLE="border:solid thin '. $color[$colIndex % 6] . '; padding:5">';
echo '<a style="float:right;margin:5px" name="'.$this->iDebugNextLinkNumber.'Open" href="#'.$this->iDebugNextLinkNumber.'Close">Function Close '.$this->iDebugNextLinkNumber.'</a>';
echo "<STRONG>{$fileName} : {$functionName}</STRONG>";
echo '<hr style="clear:both">';
array_push($this->aDebugOpenLinks, $this->iDebugNextLinkNumber);
$this->iDebugNextLinkNumber++;
return microtime();
}
/**
* Called to end the debug run of a function.
*
* This method ends a <DIV><PRE> block and reports the time since $aStartTime
* is clear to the debugging user.
*
* @author Nigel Swinson <nigelswinson@users.sourceforge.net>
* @param $aStartTime (array) the time that the function call was started.
* @param $return_value (mixed) the return value from the function call that
* we are debugging
*/
function _closeDebugFunction($aStartTime, $returnValue = "") {
echo "<hr>";
$iOpenLinkNumber = array_pop($this->aDebugOpenLinks);
echo '<a style="float:right" name="'.$iOpenLinkNumber.'Close" href="#'.$iOpenLinkNumber.'Open">Function Open '.$iOpenLinkNumber.'</a>';
if (isSet($returnValue)) {
if (is_array($returnValue))
echo "Return Value: ".print_r($returnValue)."\n";
else if (is_numeric($returnValue))
echo "Return Value: ".(string)$returnValue."\n";
else if (is_bool($returnValue))
echo "Return Value: ".($returnValue ? "TRUE" : "FALSE")."\n";
else
echo "Return Value: \"".htmlspecialchars($returnValue)."\"\n";
}
$this->_profileFunction($aStartTime, "Function took");
echo '<br style="clear:both">';
echo " \n</pre></div>";
}
/**
* Call to return time since start of function for Profiling
*
* @param $aStartTime (array) the time that the function call was started.
* @param $alertString (string) the string to describe what has just finished happening
*/
function _profileFunction($aStartTime, $alertString) {
// Print the time it took to call this function.
$now = explode(' ', microtime());
$last = explode(' ', $aStartTime);
$delta = (round( (($now[1] - $last[1]) + ($now[0] - $last[0]))*1000 ));
echo "\n{$alertString} <strong>{$delta} ms</strong>";
}
/**
* Echo an XPath context for diagnostic purposes
*
* @param $context (array) An XPath context
*/
function _printContext($context) {
echo "{$context['nodePath']}({$context['pos']}/{$context['size']})";
}
/**
* This is a debug helper function. It dumps the node-tree as HTML
*
* *QUICK AND DIRTY*. Needs some polishing.
*
* @param $node (array) A node
* @param $indent (string) (optional, default=''). For internal recursive calls.
*/
function _treeDump($node, $indent = '') {
$out = '';
// Get rid of recursion
$parentName = empty($node['parentNode']) ? "SUPER ROOT" : $node['parentNode']['name'];
unset($node['parentNode']);
$node['parentNode'] = $parentName ;
$out .= "NODE[{$node['name']}]\n";
foreach($node as $key => $val) {
if ($key === 'childNodes') continue;
if (is_Array($val)) {
$out .= $indent . " [{$key}]\n" . arrayToStr($val, $indent . ' ');
} else {
$out .= $indent . " [{$key}] => '{$val}' \n";
}
}
if (!empty($node['childNodes'])) {
$out .= $indent . " ['childNodes'] (Size = ".sizeOf($node['childNodes']).")\n";
foreach($node['childNodes'] as $key => $childNode) {
$out .= $indent . " [$key] => " . $this->_treeDump($childNode, $indent . ' ') . "\n";
}
}
if (empty($indent)) {
return "<pre>" . htmlspecialchars($out) . "</pre>";
}
return $out;
}
} // END OF CLASS XPathBase
/************************************************************************************************
* ===============================================================================================
* X P a t h E n g i n e - Class
* ===============================================================================================
************************************************************************************************/
class XPathEngine extends XPathBase {
// List of supported XPath axes.
// What a stupid idea from W3C to take axes name containing a '-' (dash)
// NOTE: We replace the '-' with '_' to avoid the conflict with the minus operator.
// We will then do the same on the users Xpath querys
// -sibling => _sibling
// -or- => _or_
//
// This array contains a list of all valid axes that can be evaluated in an
// XPath query.
var $axes = array ( 'ancestor', 'ancestor_or_self', 'attribute', 'child', 'descendant',
'descendant_or_self', 'following', 'following_sibling',
'namespace', 'parent', 'preceding', 'preceding_sibling', 'self'
);
// List of supported XPath functions.
// What a stupid idea from W3C to take function name containing a '-' (dash)
// NOTE: We replace the '-' with '_' to avoid the conflict with the minus operator.
// We will then do the same on the users Xpath querys
// starts-with => starts_with
// substring-before => substring_before
// substring-after => substring_after
// string-length => string_length
//
// This array contains a list of all valid functions that can be evaluated
// in an XPath query.
var $functions = array ( 'last', 'position', 'count', 'id', 'name',
'string', 'concat', 'starts_with', 'contains', 'substring_before',
'substring_after', 'substring', 'string_length', 'normalize_space', 'translate',
'boolean', 'not', 'true', 'false', 'lang', 'number', 'sum', 'floor',
'ceiling', 'round', 'x_lower', 'x_upper', 'generate_id' );
// List of supported XPath operators.
//
// This array contains a list of all valid operators that can be evaluated
// in a predicate of an XPath query. The list is ordered by the
// precedence of the operators (lowest precedence first).
var $operators = array( ' or ', ' and ', '=', '!=', '<=', '<', '>=', '>',
'+', '-', '*', ' div ', ' mod ', ' | ');
// List of literals from the xPath string.
var $axPathLiterals = array();
// The index and tree that is created during the analysis of an XML source.
var $nodeIndex = array();
var $nodeRoot = array();
var $emptyNode = array(
'name' => '', // The tag name. E.g. In <FOO bar="aaa"/> it would be 'FOO'
'attributes' => array(), // The attributes of the tag E.g. In <FOO bar="aaa"/> it would be array('bar'=>'aaa')
'childNodes' => array(), // Array of pointers to child nodes.
'textParts' => array(), // Array of text parts between the cilderen E.g. <FOO>aa<A>bb<B/>cc</A>dd</FOO> -> array('aa','bb','cc','dd')
'parentNode' => NULL, // Pointer to parent node or NULL if this node is the 'super root'
//-- *!* Following vars are set by the indexer and is for optimisation only *!*
'depth' => 0, // The tag depth (or tree level) starting with the root tag at 0.
'pos' => 0, // Is the zero-based position this node has in the parents 'childNodes'-list.
'contextPos' => 1, // Is the one-based position this node has by counting the siblings tags (tags with same name)
'xpath' => '' // Is the abs. XPath to this node.
);
var $_indexIsDirty = FALSE;
// These variable used during the parse XML source
var $nodeStack = array(); // The elements that we have still to close.
var $parseStackIndex = 0; // The current element of the nodeStack[] that we are adding to while
// parsing an XML source. Corresponds to the depth of the xml node.
// in our input data.
var $parseOptions = array(); // Used to set the PHP's XML parser options (see xml_parser_set_option)
var $parsedTextLocation = ''; // A reference to where we have to put char data collected during XML parsing
var $parsInCData = 0 ; // Is >0 when we are inside a CDATA section.
var $parseSkipWhiteCache = 0; // A cache of the skip whitespace parse option to speed up the parse.
// This is the array of error strings, to keep consistency.
var $errorStrings = array(
'AbsoluteXPathRequired' => "The supplied xPath '%s' does not *uniquely* describe a node in the xml document.",
'NoNodeMatch' => "The supplied xPath-query '%s' does not match *any* node in the xml document.",
'RootNodeAlreadyExists' => "An xml document may have only one root node."
);
/**
* Constructor
*
* Optionally you may call this constructor with the XML-filename to parse and the
* XML option vector. Each of the entries in the option vector will be passed to
* xml_parser_set_option().
*
* A option vector sample:
* $xmlOpt = array(XML_OPTION_CASE_FOLDING => FALSE,
* XML_OPTION_SKIP_WHITE => TRUE);
*
* @param $userXmlOptions (array) (optional) Vector of (<optionID>=><value>,
* <optionID>=><value>, ...). See PHP's
* xml_parser_set_option() docu for a list of possible
* options.
* @see importFromFile(), importFromString(), setXmlOptions()
*/
function XPathEngine($userXmlOptions=array()) {
parent::XPathBase();
// Default to not folding case
$this->parseOptions[XML_OPTION_CASE_FOLDING] = FALSE;
// And not skipping whitespace
$this->parseOptions[XML_OPTION_SKIP_WHITE] = FALSE;
// Now merge in the overrides.
// Don't use PHP's array_merge!
if (is_array($userXmlOptions)) {
foreach($userXmlOptions as $key => $val) $this->parseOptions[$key] = $val;
}
}
/**
* Resets the object so it's able to take a new xml sting/file
*
* Constructing objects is slow. If you can, reuse ones that you have used already
* by using this reset() function.
*/
function reset() {
parent::reset();
$this->properties['xmlFile'] = '';
$this->parseStackIndex = 0;
$this->parsedTextLocation = '';
$this->parsInCData = 0;
$this->nodeIndex = array();
$this->nodeRoot = array();
$this->nodeStack = array();
$this->aLiterals = array();
$this->_indexIsDirty = FALSE;
}
//-----------------------------------------------------------------------------------------
// XPathEngine ------ Get / Set Stuff ------
//-----------------------------------------------------------------------------------------
/**
* Returns the property/ies you want.
*
* if $param is not given, all properties will be returned in a hash.
*
* @param $param (string) the property you want the value of, or NULL for all the properties
* @return (mixed) string OR hash of all params, or NULL on an unknown parameter.
*/
function getProperties($param=NULL) {
$this->properties['hasContent'] = !empty($this->nodeRoot);
$this->properties['caseFolding'] = $this->parseOptions[XML_OPTION_CASE_FOLDING];
$this->properties['skipWhiteSpaces'] = $this->parseOptions[XML_OPTION_SKIP_WHITE];
if (empty($param)) return $this->properties;
if (isSet($this->properties[$param])) {
return $this->properties[$param];
} else {
return NULL;
}
}
/**
* Set an xml_parser_set_option()
*
* @param $optionID (int) The option ID (e.g. XML_OPTION_SKIP_WHITE)
* @param $value (int) The option value.
* @see XML parser functions in PHP doc
*/
function setXmlOption($optionID, $value) {
if (!is_numeric($optionID)) return;
$this->parseOptions[$optionID] = $value;
}
/**
* Sets a number of xml_parser_set_option()s
*
* @param $userXmlOptions (array) An array of parser options.
* @see setXmlOption
*/
function setXmlOptions($userXmlOptions=array()) {
if (!is_array($userXmlOptions)) return;
foreach($userXmlOptions as $key => $val) {
$this->setXmlOption($key, $val);
}
}
/**
* Alternative way to control whether case-folding is enabled for this XML parser.
*
* Short cut to setXmlOptions(XML_OPTION_CASE_FOLDING, TRUE/FALSE)
*
* When it comes to XML, case-folding simply means uppercasing all tag-
* and attribute-names (NOT the content) if set to TRUE. Note if you
* have this option set, then your XPath queries will also be case folded
* for you.
*
* @param $onOff (bool) (default TRUE)
* @see XML parser functions in PHP doc
*/
function setCaseFolding($onOff=TRUE) {
$this->parseOptions[XML_OPTION_CASE_FOLDING] = $onOff;
}
/**
* Alternative way to control whether skip-white-spaces is enabled for this XML parser.
*
* Short cut to setXmlOptions(XML_OPTION_SKIP_WHITE, TRUE/FALSE)
*
* When it comes to XML, skip-white-spaces will trim the tag content.
* An XML file with no whitespace will be faster to process, but will make
* your data less human readable when you come to write it out.
*
* Running with this option on will slow the class down, so if you want to
* speed up your XML, then run it through once skipping white-spaces, then
* write out the new version of your XML without whitespace, then use the
* new XML file with skip whitespaces turned off.
*
* @param $onOff (bool) (default TRUE)
* @see XML parser functions in PHP doc
*/
function setSkipWhiteSpaces($onOff=TRUE) {
$this->parseOptions[XML_OPTION_SKIP_WHITE] = $onOff;
}
/**
* Get the node defined by the $absoluteXPath.
*
* @param $absoluteXPath (string) (optional, default is 'super-root') xpath to the node.
* @return (array) The node, or FALSE if the node wasn't found.
*/
function &getNode($absoluteXPath='') {
if ($absoluteXPath==='/') $absoluteXPath = '';
if (!isSet($this->nodeIndex[$absoluteXPath])) return FALSE;
if ($this->_indexIsDirty) $this->reindexNodeTree();
return $this->nodeIndex[$absoluteXPath];
}
/**
* Get a the content of a node text part or node attribute.
*
* If the absolute Xpath references an attribute (Xpath ends with @ or attribute::),
* then the text value of that node-attribute is returned.
* Otherwise the Xpath is referencing a text part of the node. This can be either a
* direct reference to a text part (Xpath ends with text()[<nr>]) or indirect reference
* (a simple abs. Xpath to a node).
* 1) Direct Reference (xpath ends with text()[<part-number>]):
* If the 'part-number' is omitted, the first text-part is assumed; starting by 1.
* Negative numbers are allowed, where -1 is the last text-part a.s.o.
* 2) Indirect Reference (a simple abs. Xpath to a node):
* Default is to return the *whole text*; that is the concated text-parts of the matching
* node. (NOTE that only in this case you'll only get a copy and changes to the returned
* value wounld have no effect). Optionally you may pass a parameter
* $textPartNr to define the text-part you want; starting by 1.
* Negative numbers are allowed, where -1 is the last text-part a.s.o.
*
* NOTE I : The returned value can be fetched by reference
* E.g. $text =& wholeText(). If you wish to modify the text.
* NOTE II: text-part numbers out of range will return FALSE
* SIDENOTE:The function name is a suggestion from W3C in the XPath specification level 3.