/
TreeBuilder.java
6673 lines (6211 loc) · 272 KB
/
TreeBuilder.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007-2017 Mozilla Foundation
* Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
* Foundation, and Opera Software ASA.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* The comments following this one that use the same comment syntax as this
* comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007
* amended as of June 28 2007.
* That document came with this statement:
* "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and
* Opera Software ASA. You are granted a license to use, reproduce and
* create derivative works of this document."
*/
package nu.validator.htmlparser.impl;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.xml.sax.ErrorHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import nu.validator.htmlparser.annotation.Auto;
import nu.validator.htmlparser.annotation.Const;
import nu.validator.htmlparser.annotation.IdType;
import nu.validator.htmlparser.annotation.Inline;
import nu.validator.htmlparser.annotation.Literal;
import nu.validator.htmlparser.annotation.Local;
import nu.validator.htmlparser.annotation.NoLength;
import nu.validator.htmlparser.annotation.NsUri;
import nu.validator.htmlparser.common.DocumentMode;
import nu.validator.htmlparser.common.DocumentModeHandler;
import nu.validator.htmlparser.common.Interner;
import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.common.XmlViolationPolicy;
public abstract class TreeBuilder<T> implements TokenHandler,
TreeBuilderState<T> {
/**
* Array version of U+FFFD.
*/
private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
// Start dispatch groups
final static int OTHER = 0;
final static int A = 1;
final static int BASE = 2;
final static int BODY = 3;
final static int BR = 4;
final static int BUTTON = 5;
final static int CAPTION = 6;
final static int COL = 7;
final static int COLGROUP = 8;
final static int FORM = 9;
final static int FRAME = 10;
final static int FRAMESET = 11;
final static int IMAGE = 12;
final static int INPUT = 13;
final static int RT_OR_RP = 14;
final static int LI = 15;
final static int LINK_OR_BASEFONT_OR_BGSOUND = 16;
final static int MATH = 17;
final static int META = 18;
final static int SVG = 19;
final static int HEAD = 20;
final static int HR = 22;
final static int HTML = 23;
final static int NOBR = 24;
final static int NOFRAMES = 25;
final static int NOSCRIPT = 26;
final static int OPTGROUP = 27;
final static int OPTION = 28;
final static int P = 29;
final static int PLAINTEXT = 30;
final static int SCRIPT = 31;
final static int SELECT = 32;
final static int STYLE = 33;
final static int TABLE = 34;
final static int TEXTAREA = 35;
final static int TITLE = 36;
final static int TR = 37;
final static int XMP = 38;
final static int TBODY_OR_THEAD_OR_TFOOT = 39;
final static int TD_OR_TH = 40;
final static int DD_OR_DT = 41;
final static int H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42;
final static int MARQUEE_OR_APPLET = 43;
final static int PRE_OR_LISTING = 44;
final static int B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45;
final static int UL_OR_OL_OR_DL = 46;
final static int IFRAME = 47;
final static int EMBED = 48;
final static int AREA_OR_WBR = 49;
final static int DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50;
final static int ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SEARCH_OR_SECTION_OR_SUMMARY = 51;
final static int RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52;
final static int RB_OR_RTC = 53;
final static int PARAM_OR_SOURCE_OR_TRACK = 55;
final static int MGLYPH_OR_MALIGNMARK = 56;
final static int MI_MO_MN_MS_MTEXT = 57;
final static int ANNOTATION_XML = 58;
final static int FOREIGNOBJECT_OR_DESC = 59;
final static int NOEMBED = 60;
final static int FIELDSET = 61;
final static int OUTPUT = 62;
final static int OBJECT = 63;
final static int FONT = 64;
final static int KEYGEN = 65;
final static int TEMPLATE = 66;
final static int IMG = 67;
// start insertion modes
private static final int IN_ROW = 0;
private static final int IN_TABLE_BODY = 1;
private static final int IN_TABLE = 2;
private static final int IN_CAPTION = 3;
private static final int IN_CELL = 4;
private static final int FRAMESET_OK = 5;
private static final int IN_BODY = 6;
private static final int IN_HEAD = 7;
private static final int IN_HEAD_NOSCRIPT = 8;
// no fall-through
private static final int IN_COLUMN_GROUP = 9;
// no fall-through
private static final int IN_SELECT_IN_TABLE = 10;
private static final int IN_SELECT = 11;
// no fall-through
private static final int AFTER_BODY = 12;
// no fall-through
private static final int IN_FRAMESET = 13;
private static final int AFTER_FRAMESET = 14;
// no fall-through
private static final int INITIAL = 15;
// could add fall-through
private static final int BEFORE_HTML = 16;
// could add fall-through
private static final int BEFORE_HEAD = 17;
// no fall-through
private static final int AFTER_HEAD = 18;
// no fall-through
private static final int AFTER_AFTER_BODY = 19;
// no fall-through
private static final int AFTER_AFTER_FRAMESET = 20;
// no fall-through
private static final int TEXT = 21;
private static final int IN_TEMPLATE = 22;
// start charset states
private static final int CHARSET_INITIAL = 0;
private static final int CHARSET_C = 1;
private static final int CHARSET_H = 2;
private static final int CHARSET_A = 3;
private static final int CHARSET_R = 4;
private static final int CHARSET_S = 5;
private static final int CHARSET_E = 6;
private static final int CHARSET_T = 7;
private static final int CHARSET_EQUALS = 8;
private static final int CHARSET_SINGLE_QUOTED = 9;
private static final int CHARSET_DOUBLE_QUOTED = 10;
private static final int CHARSET_UNQUOTED = 11;
// end pseudo enums
@Literal private final static String[] QUIRKY_PUBLIC_IDS = {
"+//silmaril//dtd html pro v0r11 19970101//",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
"-//as//dtd html 3.0 aswedit + extensions//",
"-//ietf//dtd html 2.0 level 1//",
"-//ietf//dtd html 2.0 level 2//",
"-//ietf//dtd html 2.0 strict level 1//",
"-//ietf//dtd html 2.0 strict level 2//",
"-//ietf//dtd html 2.0 strict//",
"-//ietf//dtd html 2.0//",
"-//ietf//dtd html 2.1e//",
"-//ietf//dtd html 3.0//",
"-//ietf//dtd html 3.2 final//",
"-//ietf//dtd html 3.2//",
"-//ietf//dtd html 3//",
"-//ietf//dtd html level 0//",
"-//ietf//dtd html level 1//",
"-//ietf//dtd html level 2//",
"-//ietf//dtd html level 3//",
"-//ietf//dtd html strict level 0//",
"-//ietf//dtd html strict level 1//",
"-//ietf//dtd html strict level 2//",
"-//ietf//dtd html strict level 3//",
"-//ietf//dtd html strict//",
"-//ietf//dtd html//",
"-//metrius//dtd metrius presentational//",
"-//microsoft//dtd internet explorer 2.0 html strict//",
"-//microsoft//dtd internet explorer 2.0 html//",
"-//microsoft//dtd internet explorer 2.0 tables//",
"-//microsoft//dtd internet explorer 3.0 html strict//",
"-//microsoft//dtd internet explorer 3.0 html//",
"-//microsoft//dtd internet explorer 3.0 tables//",
"-//netscape comm. corp.//dtd html//",
"-//netscape comm. corp.//dtd strict html//",
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
"-//spyglass//dtd html 2.0 extended//",
"-//sq//dtd html 2.0 hotmetal + extensions//",
"-//sun microsystems corp.//dtd hotjava html//",
"-//sun microsystems corp.//dtd hotjava strict html//",
"-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//",
"-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//",
"-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//",
"-//w3c//dtd html 4.0 transitional//",
"-//w3c//dtd html experimental 19960712//",
"-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//",
"-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//",
"-//webtechs//dtd mozilla html//" };
private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE;
// [NOCPP[
private static final @Local String HTML_LOCAL = "html";
// ]NOCPP]
private int mode = INITIAL;
private int originalMode = INITIAL;
/**
* Used only when moving back to IN_BODY.
*/
private boolean framesetOk = true;
protected Tokenizer tokenizer;
// [NOCPP[
protected ErrorHandler errorHandler;
private DocumentModeHandler documentModeHandler;
// ]NOCPP]
private boolean scriptingEnabled = false;
private boolean needToDropLF;
// [NOCPP[
private boolean wantingComments;
// ]NOCPP]
private boolean fragment;
private @Local String contextName;
private @NsUri String contextNamespace;
private T contextNode;
/**
* Stack of template insertion modes
*/
private @Auto int[] templateModeStack;
/**
* Current template mode stack pointer.
*/
private int templateModePtr = -1;
private @Auto StackNode<T>[] stackNodes;
/**
* Index of the earliest possible unused or empty element in stackNodes.
*/
private int stackNodesIdx = -1;
private int numStackNodes = 0;
private @Auto StackNode<T>[] stack;
private int currentPtr = -1;
private @Auto StackNode<T>[] listOfActiveFormattingElements;
private int listPtr = -1;
private T formPointer;
private T headPointer;
protected @Auto char[] charBuffer;
protected int charBufferLen = 0;
private boolean quirks = false;
private boolean forceNoQuirks = false;
private boolean allowDeclarativeShadowRoots = false;
// [NOCPP[
private boolean reportingDoctype = true;
private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
private final Map<String, LocatorImpl> idLocations = new HashMap<String, LocatorImpl>();
// ]NOCPP]
protected TreeBuilder() {
fragment = false;
}
/**
* Reports an condition that would make the infoset incompatible with XML
* 1.0 as fatal.
*
* @throws SAXException
* @throws SAXParseException
*/
protected void fatal() throws SAXException {
}
// CPPONLY: @Inline private @Creator Object htmlCreator(@HtmlCreator Object htmlCreator) {
// CPPONLY: @Creator Object creator;
// CPPONLY: creator.html = htmlCreator;
// CPPONLY: return creator;
// CPPONLY: }
// CPPONLY:
// CPPONLY: @Inline private @Creator Object svgCreator(@SvgCreator Object svgCreator) {
// CPPONLY: @Creator Object creator;
// CPPONLY: creator.svg = svgCreator;
// CPPONLY: return creator;
// CPPONLY: }
// [NOCPP[
protected final void fatal(Exception e) throws SAXException {
SAXParseException spe = new SAXParseException(e.getMessage(),
tokenizer, e);
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
final void fatal(String s) throws SAXException {
SAXParseException spe = new SAXParseException(s, tokenizer);
if (errorHandler != null) {
errorHandler.fatalError(spe);
}
throw spe;
}
/**
* Reports a Parse Error.
*
* @param message
* the message
* @throws SAXException
*/
final void err(String message) throws SAXException {
if (errorHandler == null) {
return;
}
errNoCheck(message);
}
/**
* Reports a Parse Error without checking if an error handler is present.
*
* @param message
* the message
* @throws SAXException
*/
final void errNoCheck(String message) throws SAXException {
SAXParseException spe = new SAXParseException(message, tokenizer);
errorHandler.error(spe);
}
private void errListUnclosedStartTags(int eltPos) throws SAXException {
if (currentPtr != -1) {
for (int i = currentPtr; i > eltPos; i--) {
reportUnclosedElementNameAndLocation(i);
}
}
}
/**
* Reports the name and location of an unclosed element.
*
* @throws SAXException
*/
private final void reportUnclosedElementNameAndLocation(int pos) throws SAXException {
StackNode<T> node = stack[pos];
if (node.isOptionalEndTag()) {
return;
}
TaintableLocatorImpl locator = node.getLocator();
if (locator.isTainted()) {
return;
}
locator.markTainted();
SAXParseException spe = new SAXParseException(
"Unclosed element \u201C" + node.popName + "\u201D.", locator);
errorHandler.error(spe);
}
/**
* Reports a warning
*
* @param message
* the message
* @throws SAXException
*/
final void warn(String message) throws SAXException {
if (errorHandler == null) {
return;
}
SAXParseException spe = new SAXParseException(message, tokenizer);
errorHandler.warning(spe);
}
/**
* Reports a warning with an explicit locator
*
* @param message
* the message
* @throws SAXException
*/
final void warn(String message, Locator locator) throws SAXException {
if (errorHandler == null) {
return;
}
SAXParseException spe = new SAXParseException(message, locator);
errorHandler.warning(spe);
}
// ]NOCPP]
@SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException {
tokenizer = self;
stackNodes = new StackNode[64];
stack = new StackNode[64];
templateModeStack = new int[64];
listOfActiveFormattingElements = new StackNode[64];
needToDropLF = false;
originalMode = INITIAL;
templateModePtr = -1;
stackNodesIdx = 0;
numStackNodes = 0;
currentPtr = -1;
listPtr = -1;
formPointer = null;
headPointer = null;
// [NOCPP[
idLocations.clear();
wantingComments = wantsComments();
// ]NOCPP]
start(fragment);
charBufferLen = 0;
charBuffer = null;
framesetOk = true;
if (fragment) {
T elt;
if (contextNode != null) {
elt = contextNode;
} else {
elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes());
}
// When the context node is not in the HTML namespace, contrary
// to the spec, the first node on the stack is not set to "html"
// in the HTML namespace. Instead, it is set to a node that has
// the characteristics of the appropriate "adjusted current node".
// This way, there is no need to perform "adjusted current node"
// checks during tree construction. Instead, it's sufficient to
// just look at the current node. However, this also means that it
// is not safe to treat "html" in the HTML namespace as a sentinel
// that ends stack popping. Instead, stack popping loops that are
// meant not to pop the first element on the stack need to check
// for currentPos becoming zero.
if (contextNamespace == "http://www.w3.org/2000/svg") {
ElementName elementName = ElementName.SVG;
if ("title" == contextName || "desc" == contextName
|| "foreignObject" == contextName) {
// These elements are all alike and we don't care about
// the exact name.
elementName = ElementName.FOREIGNOBJECT;
}
// This is the SVG variant of the StackNode constructor.
StackNode<T> node = createStackNode(elementName,
elementName.getCamelCaseName(), elt
// [NOCPP[
, errorHandler == null ? null
: new TaintableLocatorImpl(tokenizer)
// ]NOCPP]
);
currentPtr++;
stack[currentPtr] = node;
tokenizer.setState(Tokenizer.DATA);
// The frameset-ok flag is set even though <frameset> never
// ends up being allowed as HTML frameset in the fragment case.
mode = FRAMESET_OK;
} else if (contextNamespace == "http://www.w3.org/1998/Math/MathML") {
ElementName elementName = ElementName.MATH;
if ("mi" == contextName || "mo" == contextName
|| "mn" == contextName || "ms" == contextName
|| "mtext" == contextName) {
// These elements are all alike and we don't care about
// the exact name.
elementName = ElementName.MTEXT;
} else if ("annotation-xml" == contextName) {
elementName = ElementName.ANNOTATION_XML;
// Blink does not check the encoding attribute of the
// annotation-xml element innerHTML is being set on.
// Let's do the same at least until
// https://www.w3.org/Bugs/Public/show_bug.cgi?id=26783
// is resolved.
}
// This is the MathML variant of the StackNode constructor.
StackNode<T> node = createStackNode(elementName, elt,
elementName.getName(), false
// [NOCPP[
, errorHandler == null ? null
: new TaintableLocatorImpl(tokenizer)
// ]NOCPP]
);
currentPtr++;
stack[currentPtr] = node;
tokenizer.setState(Tokenizer.DATA);
// The frameset-ok flag is set even though <frameset> never
// ends up being allowed as HTML frameset in the fragment case.
mode = FRAMESET_OK;
} else { // html
StackNode<T> node = createStackNode(ElementName.HTML, elt
// [NOCPP[
, errorHandler == null ? null
: new TaintableLocatorImpl(tokenizer)
// ]NOCPP]
);
currentPtr++;
stack[currentPtr] = node;
if ("template" == contextName) {
pushTemplateMode(IN_TEMPLATE);
}
resetTheInsertionMode();
formPointer = getFormPointerForContext(contextNode);
if ("title" == contextName || "textarea" == contextName) {
tokenizer.setState(Tokenizer.RCDATA);
} else if ("style" == contextName || "xmp" == contextName
|| "iframe" == contextName || "noembed" == contextName
|| "noframes" == contextName
|| (scriptingEnabled && "noscript" == contextName)) {
tokenizer.setState(Tokenizer.RAWTEXT);
} else if ("plaintext" == contextName) {
tokenizer.setState(Tokenizer.PLAINTEXT);
} else if ("script" == contextName) {
tokenizer.setState(Tokenizer.SCRIPT_DATA);
} else {
tokenizer.setState(Tokenizer.DATA);
}
}
} else {
mode = INITIAL;
// If we are viewing XML source, put a foreign element permanently
// on the stack so that cdataSectionAllowed() returns true.
// CPPONLY: if (tokenizer.isViewingXmlSource()) {
// CPPONLY: T elt = createElement("http://www.w3.org/2000/svg",
// CPPONLY: "svg",
// CPPONLY: tokenizer.emptyAttributes(), null,
// CPPONLY: svgCreator(NS_NewSVGSVGElement));
// CPPONLY: StackNode<T> node = createStackNode(ElementName.SVG,
// CPPONLY: "svg",
// CPPONLY: elt);
// CPPONLY: currentPtr++;
// CPPONLY: stack[currentPtr] = node;
// CPPONLY: }
}
}
public final void doctype(@Local String name, String publicIdentifier,
String systemIdentifier, boolean forceQuirks) throws SAXException {
needToDropLF = false;
if (!isInForeign() && mode == INITIAL) {
// [NOCPP[
if (reportingDoctype) {
// ]NOCPP]
String emptyString = Portability.newEmptyString();
appendDoctypeToDocument(name == null ? "" : name,
publicIdentifier == null ? emptyString
: publicIdentifier,
systemIdentifier == null ? emptyString
: systemIdentifier);
Portability.releaseString(emptyString);
// [NOCPP[
}
// ]NOCPP]
if (isQuirky(name, publicIdentifier, systemIdentifier,
forceQuirks)) {
errQuirkyDoctype();
documentModeInternal(DocumentMode.QUIRKS_MODE,
publicIdentifier, systemIdentifier);
} else if (isAlmostStandards(publicIdentifier,
systemIdentifier)) {
errAlmostStandardsDoctype();
documentModeInternal(
DocumentMode.ALMOST_STANDARDS_MODE,
publicIdentifier, systemIdentifier);
} else {
// [NOCPP[
if ((Portability.literalEqualsString(
"-//W3C//DTD HTML 4.0//EN", publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
"http://www.w3.org/TR/REC-html40/strict.dtd",
systemIdentifier)))
|| (Portability.literalEqualsString(
"-//W3C//DTD HTML 4.01//EN",
publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
"http://www.w3.org/TR/html4/strict.dtd",
systemIdentifier)))
|| (Portability.literalEqualsString(
"-//W3C//DTD XHTML 1.0 Strict//EN",
publicIdentifier) && Portability.literalEqualsString(
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd",
systemIdentifier))
|| (Portability.literalEqualsString(
"-//W3C//DTD XHTML 1.1//EN",
publicIdentifier) && Portability.literalEqualsString(
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd",
systemIdentifier))
) {
err("Obsolete doctype. Expected \u201C<!DOCTYPE html>\u201D.");
} else if (!((systemIdentifier == null || Portability.literalEqualsString(
"about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) {
err("Legacy doctype. Expected \u201C<!DOCTYPE html>\u201D.");
}
// ]NOCPP]
documentModeInternal(DocumentMode.STANDARDS_MODE,
publicIdentifier, systemIdentifier);
}
/*
*
* Then, switch to the root element mode of the tree construction
* stage.
*/
mode = BEFORE_HTML;
return;
}
/*
* A DOCTYPE token Parse error.
*/
errStrayDoctype();
/*
* Ignore the token.
*/
return;
}
public final void comment(@NoLength char[] buf, int start, int length)
throws SAXException {
needToDropLF = false;
// [NOCPP[
if (!wantingComments) {
return;
}
// ]NOCPP]
if (!isInForeign()) {
switch (mode) {
case INITIAL:
case BEFORE_HTML:
case AFTER_AFTER_BODY:
case AFTER_AFTER_FRAMESET:
/*
* A comment token Append a Comment node to the Document
* object with the data attribute set to the data given in
* the comment token.
*/
appendCommentToDocument(buf, start, length);
return;
case AFTER_BODY:
/*
* A comment token Append a Comment node to the first
* element in the stack of open elements (the html element),
* with the data attribute set to the data given in the
* comment token.
*/
flushCharacters();
appendComment(stack[0].node, buf, start, length);
return;
default:
break;
}
}
/*
* A comment token Append a Comment node to the current node with the
* data attribute set to the data given in the comment token.
*/
flushCharacters();
appendComment(stack[currentPtr].node, buf, start, length);
return;
}
/**
* @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int,
* int)
*/
public final void characters(@Const @NoLength char[] buf, int start, int length)
throws SAXException {
// Note: Can't attach error messages to EOF in C++ yet
// CPPONLY: if (tokenizer.isViewingXmlSource()) {
// CPPONLY: return;
// CPPONLY: }
if (needToDropLF) {
needToDropLF = false;
if (buf[start] == '\n') {
start++;
length--;
if (length == 0) {
return;
}
}
}
// optimize the most common case
switch (mode) {
case IN_BODY:
case IN_CELL:
case IN_CAPTION:
if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
reconstructTheActiveFormattingElements();
}
// CPPONLY: MOZ_FALLTHROUGH;
case TEXT:
accumulateCharacters(buf, start, length);
return;
case IN_TABLE:
case IN_TABLE_BODY:
case IN_ROW:
accumulateCharactersForced(buf, start, length);
return;
default:
int end = start + length;
charactersloop: for (int i = start; i < end; i++) {
switch (buf[i]) {
case ' ':
case '\t':
case '\n':
case '\r':
case '\u000C':
/*
* A character token that is one of one of U+0009
* CHARACTER TABULATION, U+000A LINE FEED (LF),
* U+000C FORM FEED (FF), or U+0020 SPACE
*/
switch (mode) {
case INITIAL:
case BEFORE_HTML:
case BEFORE_HEAD:
/*
* Ignore the token.
*/
start = i + 1;
continue;
case IN_HEAD:
case IN_HEAD_NOSCRIPT:
case AFTER_HEAD:
case IN_COLUMN_GROUP:
case IN_FRAMESET:
case AFTER_FRAMESET:
/*
* Append the character to the current node.
*/
continue;
case FRAMESET_OK:
case IN_TEMPLATE:
case IN_BODY:
case IN_CELL:
case IN_CAPTION:
if (start < i) {
accumulateCharacters(buf, start, i
- start);
start = i;
}
/*
* Reconstruct the active formatting
* elements, if any.
*/
if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
flushCharacters();
reconstructTheActiveFormattingElements();
}
/*
* Append the token's character to the
* current node.
*/
break charactersloop;
case IN_SELECT:
case IN_SELECT_IN_TABLE:
break charactersloop;
case IN_TABLE:
case IN_TABLE_BODY:
case IN_ROW:
accumulateCharactersForced(buf, i, 1);
start = i + 1;
continue;
case AFTER_BODY:
case AFTER_AFTER_BODY:
case AFTER_AFTER_FRAMESET:
if (start < i) {
accumulateCharacters(buf, start, i
- start);
start = i;
}
/*
* Reconstruct the active formatting
* elements, if any.
*/
flushCharacters();
reconstructTheActiveFormattingElements();
/*
* Append the token's character to the
* current node.
*/
continue;
}
// CPPONLY: MOZ_FALLTHROUGH_ASSERT();
default:
/*
* A character token that is not one of one of
* U+0009 CHARACTER TABULATION, U+000A LINE FEED
* (LF), U+000C FORM FEED (FF), or U+0020 SPACE
*/
switch (mode) {
case INITIAL:
/*
* Parse error.
*/
// [NOCPP[
// XXX figure out a way to report this in the Gecko View Source case
err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
// ]NOCPP]
/*
*
* Set the document to quirks mode.
*/
documentModeInternal(
DocumentMode.QUIRKS_MODE, null,
null);
/*
* Then, switch to the root element mode of
* the tree construction stage
*/
mode = BEFORE_HTML;
/*
* and reprocess the current token.
*/
i--;
continue;