Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 * Copyright (c) 2007 Henri Sivonen
3 * Copyright (c) 2007-2011 Mozilla Foundation
4 * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla
5 * Foundation, and Opera Software ASA.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
26 /*
27 * The comments following this one that use the same comment syntax as this
28 * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007
29 * amended as of June 28 2007.
30 * That document came with this statement:
31 * "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and
32 * Opera Software ASA. You are granted a license to use, reproduce and
33 * create derivative works of this document."
34 */
36 package nu.validator.htmlparser.impl;
38 import java.util.Arrays;
39 import java.util.HashMap;
40 import java.util.Map;
42 import nu.validator.htmlparser.annotation.Auto;
43 import nu.validator.htmlparser.annotation.Const;
44 import nu.validator.htmlparser.annotation.IdType;
45 import nu.validator.htmlparser.annotation.Inline;
46 import nu.validator.htmlparser.annotation.Literal;
47 import nu.validator.htmlparser.annotation.Local;
48 import nu.validator.htmlparser.annotation.NoLength;
49 import nu.validator.htmlparser.annotation.NsUri;
50 import nu.validator.htmlparser.common.DoctypeExpectation;
51 import nu.validator.htmlparser.common.DocumentMode;
52 import nu.validator.htmlparser.common.DocumentModeHandler;
53 import nu.validator.htmlparser.common.Interner;
54 import nu.validator.htmlparser.common.TokenHandler;
55 import nu.validator.htmlparser.common.XmlViolationPolicy;
57 import org.xml.sax.ErrorHandler;
58 import org.xml.sax.Locator;
59 import org.xml.sax.SAXException;
60 import org.xml.sax.SAXParseException;
62 public abstract class TreeBuilder<T> implements TokenHandler,
63 TreeBuilderState<T> {
65 /**
66 * Array version of U+FFFD.
67 */
68 private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
70 // Start dispatch groups
72 final static int OTHER = 0;
74 final static int A = 1;
76 final static int BASE = 2;
78 final static int BODY = 3;
80 final static int BR = 4;
82 final static int BUTTON = 5;
84 final static int CAPTION = 6;
86 final static int COL = 7;
88 final static int COLGROUP = 8;
90 final static int FORM = 9;
92 final static int FRAME = 10;
94 final static int FRAMESET = 11;
96 final static int IMAGE = 12;
98 final static int INPUT = 13;
100 final static int ISINDEX = 14;
102 final static int LI = 15;
104 final static int LINK_OR_BASEFONT_OR_BGSOUND = 16;
106 final static int MATH = 17;
108 final static int META = 18;
110 final static int SVG = 19;
112 final static int HEAD = 20;
114 final static int HR = 22;
116 final static int HTML = 23;
118 final static int NOBR = 24;
120 final static int NOFRAMES = 25;
122 final static int NOSCRIPT = 26;
124 final static int OPTGROUP = 27;
126 final static int OPTION = 28;
128 final static int P = 29;
130 final static int PLAINTEXT = 30;
132 final static int SCRIPT = 31;
134 final static int SELECT = 32;
136 final static int STYLE = 33;
138 final static int TABLE = 34;
140 final static int TEXTAREA = 35;
142 final static int TITLE = 36;
144 final static int TR = 37;
146 final static int XMP = 38;
148 final static int TBODY_OR_THEAD_OR_TFOOT = 39;
150 final static int TD_OR_TH = 40;
152 final static int DD_OR_DT = 41;
154 final static int H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42;
156 final static int MARQUEE_OR_APPLET = 43;
158 final static int PRE_OR_LISTING = 44;
160 final static int B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45;
162 final static int UL_OR_OL_OR_DL = 46;
164 final static int IFRAME = 47;
166 final static int EMBED = 48;
168 final static int AREA_OR_WBR = 49;
170 final static int DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50;
172 final static int ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY = 51;
174 final static int RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52;
176 final static int RT_OR_RP = 53;
178 final static int PARAM_OR_SOURCE_OR_TRACK = 55;
180 final static int MGLYPH_OR_MALIGNMARK = 56;
182 final static int MI_MO_MN_MS_MTEXT = 57;
184 final static int ANNOTATION_XML = 58;
186 final static int FOREIGNOBJECT_OR_DESC = 59;
188 final static int NOEMBED = 60;
190 final static int FIELDSET = 61;
192 final static int OUTPUT_OR_LABEL = 62;
194 final static int OBJECT = 63;
196 final static int FONT = 64;
198 final static int KEYGEN = 65;
200 final static int MENUITEM = 66;
202 final static int TEMPLATE = 67;
204 final static int IMG = 68;
206 // start insertion modes
208 private static final int IN_ROW = 0;
210 private static final int IN_TABLE_BODY = 1;
212 private static final int IN_TABLE = 2;
214 private static final int IN_CAPTION = 3;
216 private static final int IN_CELL = 4;
218 private static final int FRAMESET_OK = 5;
220 private static final int IN_BODY = 6;
222 private static final int IN_HEAD = 7;
224 private static final int IN_HEAD_NOSCRIPT = 8;
226 // no fall-through
228 private static final int IN_COLUMN_GROUP = 9;
230 // no fall-through
232 private static final int IN_SELECT_IN_TABLE = 10;
234 private static final int IN_SELECT = 11;
236 // no fall-through
238 private static final int AFTER_BODY = 12;
240 // no fall-through
242 private static final int IN_FRAMESET = 13;
244 private static final int AFTER_FRAMESET = 14;
246 // no fall-through
248 private static final int INITIAL = 15;
250 // could add fall-through
252 private static final int BEFORE_HTML = 16;
254 // could add fall-through
256 private static final int BEFORE_HEAD = 17;
258 // no fall-through
260 private static final int AFTER_HEAD = 18;
262 // no fall-through
264 private static final int AFTER_AFTER_BODY = 19;
266 // no fall-through
268 private static final int AFTER_AFTER_FRAMESET = 20;
270 // no fall-through
272 private static final int TEXT = 21;
274 private static final int IN_TEMPLATE = 22;
276 // start charset states
278 private static final int CHARSET_INITIAL = 0;
280 private static final int CHARSET_C = 1;
282 private static final int CHARSET_H = 2;
284 private static final int CHARSET_A = 3;
286 private static final int CHARSET_R = 4;
288 private static final int CHARSET_S = 5;
290 private static final int CHARSET_E = 6;
292 private static final int CHARSET_T = 7;
294 private static final int CHARSET_EQUALS = 8;
296 private static final int CHARSET_SINGLE_QUOTED = 9;
298 private static final int CHARSET_DOUBLE_QUOTED = 10;
300 private static final int CHARSET_UNQUOTED = 11;
302 // end pseudo enums
304 // [NOCPP[
306 private final static String[] HTML4_PUBLIC_IDS = {
307 "-//W3C//DTD HTML 4.0 Frameset//EN",
308 "-//W3C//DTD HTML 4.0 Transitional//EN",
309 "-//W3C//DTD HTML 4.0//EN", "-//W3C//DTD HTML 4.01 Frameset//EN",
310 "-//W3C//DTD HTML 4.01 Transitional//EN",
311 "-//W3C//DTD HTML 4.01//EN" };
313 // ]NOCPP]
315 @Literal private final static String[] QUIRKY_PUBLIC_IDS = {
316 "+//silmaril//dtd html pro v0r11 19970101//",
317 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
318 "-//as//dtd html 3.0 aswedit + extensions//",
319 "-//ietf//dtd html 2.0 level 1//",
320 "-//ietf//dtd html 2.0 level 2//",
321 "-//ietf//dtd html 2.0 strict level 1//",
322 "-//ietf//dtd html 2.0 strict level 2//",
323 "-//ietf//dtd html 2.0 strict//",
324 "-//ietf//dtd html 2.0//",
325 "-//ietf//dtd html 2.1e//",
326 "-//ietf//dtd html 3.0//",
327 "-//ietf//dtd html 3.2 final//",
328 "-//ietf//dtd html 3.2//",
329 "-//ietf//dtd html 3//",
330 "-//ietf//dtd html level 0//",
331 "-//ietf//dtd html level 1//",
332 "-//ietf//dtd html level 2//",
333 "-//ietf//dtd html level 3//",
334 "-//ietf//dtd html strict level 0//",
335 "-//ietf//dtd html strict level 1//",
336 "-//ietf//dtd html strict level 2//",
337 "-//ietf//dtd html strict level 3//",
338 "-//ietf//dtd html strict//",
339 "-//ietf//dtd html//",
340 "-//metrius//dtd metrius presentational//",
341 "-//microsoft//dtd internet explorer 2.0 html strict//",
342 "-//microsoft//dtd internet explorer 2.0 html//",
343 "-//microsoft//dtd internet explorer 2.0 tables//",
344 "-//microsoft//dtd internet explorer 3.0 html strict//",
345 "-//microsoft//dtd internet explorer 3.0 html//",
346 "-//microsoft//dtd internet explorer 3.0 tables//",
347 "-//netscape comm. corp.//dtd html//",
348 "-//netscape comm. corp.//dtd strict html//",
349 "-//o'reilly and associates//dtd html 2.0//",
350 "-//o'reilly and associates//dtd html extended 1.0//",
351 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
352 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
353 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
354 "-//spyglass//dtd html 2.0 extended//",
355 "-//sq//dtd html 2.0 hotmetal + extensions//",
356 "-//sun microsystems corp.//dtd hotjava html//",
357 "-//sun microsystems corp.//dtd hotjava strict html//",
358 "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//",
359 "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//",
360 "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//",
361 "-//w3c//dtd html 4.0 transitional//",
362 "-//w3c//dtd html experimental 19960712//",
363 "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//",
364 "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//",
365 "-//webtechs//dtd mozilla html//" };
367 private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE;
369 // [NOCPP[
371 private static final @Local String HTML_LOCAL = "html";
373 // ]NOCPP]
375 private int mode = INITIAL;
377 private int originalMode = INITIAL;
379 /**
380 * Used only when moving back to IN_BODY.
381 */
382 private boolean framesetOk = true;
384 protected Tokenizer tokenizer;
386 // [NOCPP[
388 protected ErrorHandler errorHandler;
390 private DocumentModeHandler documentModeHandler;
392 private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
394 private LocatorImpl firstCommentLocation;
396 // ]NOCPP]
398 private boolean scriptingEnabled = false;
400 private boolean needToDropLF;
402 // [NOCPP[
404 private boolean wantingComments;
406 // ]NOCPP]
408 private boolean fragment;
410 private @Local String contextName;
412 private @NsUri String contextNamespace;
414 private T contextNode;
416 /**
417 * Stack of template insertion modes
418 */
419 private @Auto int[] templateModeStack;
421 /**
422 * Current template mode stack pointer.
423 */
424 private int templateModePtr = -1;
426 private @Auto StackNode<T>[] stack;
428 private int currentPtr = -1;
430 private @Auto StackNode<T>[] listOfActiveFormattingElements;
432 private int listPtr = -1;
434 private T formPointer;
436 private T headPointer;
438 /**
439 * Used to work around Gecko limitations. Not used in Java.
440 */
441 private T deepTreeSurrogateParent;
443 protected @Auto char[] charBuffer;
445 protected int charBufferLen = 0;
447 private boolean quirks = false;
449 private boolean isSrcdocDocument = false;
451 // [NOCPP[
453 private boolean reportingDoctype = true;
455 private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
457 private final Map<String, LocatorImpl> idLocations = new HashMap<String, LocatorImpl>();
459 private boolean html4;
461 // ]NOCPP]
463 protected TreeBuilder() {
464 fragment = false;
465 }
467 /**
468 * Reports an condition that would make the infoset incompatible with XML
469 * 1.0 as fatal.
470 *
471 * @throws SAXException
472 * @throws SAXParseException
473 */
474 protected void fatal() throws SAXException {
475 }
477 // [NOCPP[
479 protected final void fatal(Exception e) throws SAXException {
480 SAXParseException spe = new SAXParseException(e.getMessage(),
481 tokenizer, e);
482 if (errorHandler != null) {
483 errorHandler.fatalError(spe);
484 }
485 throw spe;
486 }
488 final void fatal(String s) throws SAXException {
489 SAXParseException spe = new SAXParseException(s, tokenizer);
490 if (errorHandler != null) {
491 errorHandler.fatalError(spe);
492 }
493 throw spe;
494 }
496 /**
497 * Reports a Parse Error.
498 *
499 * @param message
500 * the message
501 * @throws SAXException
502 */
503 final void err(String message) throws SAXException {
504 if (errorHandler == null) {
505 return;
506 }
507 errNoCheck(message);
508 }
510 /**
511 * Reports a Parse Error without checking if an error handler is present.
512 *
513 * @param message
514 * the message
515 * @throws SAXException
516 */
517 final void errNoCheck(String message) throws SAXException {
518 SAXParseException spe = new SAXParseException(message, tokenizer);
519 errorHandler.error(spe);
520 }
522 private void errListUnclosedStartTags(int eltPos) throws SAXException {
523 if (currentPtr != -1) {
524 for (int i = currentPtr; i > eltPos; i--) {
525 reportUnclosedElementNameAndLocation(i);
526 }
527 }
528 }
530 /**
531 * Reports the name and location of an unclosed element.
532 *
533 * @throws SAXException
534 */
535 private final void reportUnclosedElementNameAndLocation(int pos) throws SAXException {
536 StackNode<T> node = stack[pos];
537 if (node.isOptionalEndTag()) {
538 return;
539 }
540 TaintableLocatorImpl locator = node.getLocator();
541 if (locator.isTainted()) {
542 return;
543 }
544 locator.markTainted();
545 SAXParseException spe = new SAXParseException(
546 "Unclosed element \u201C" + node.popName + "\u201D.", locator);
547 errorHandler.error(spe);
548 }
550 /**
551 * Reports a warning
552 *
553 * @param message
554 * the message
555 * @throws SAXException
556 */
557 final void warn(String message) throws SAXException {
558 if (errorHandler == null) {
559 return;
560 }
561 SAXParseException spe = new SAXParseException(message, tokenizer);
562 errorHandler.warning(spe);
563 }
565 /**
566 * Reports a warning with an explicit locator
567 *
568 * @param message
569 * the message
570 * @throws SAXException
571 */
572 final void warn(String message, Locator locator) throws SAXException {
573 if (errorHandler == null) {
574 return;
575 }
576 SAXParseException spe = new SAXParseException(message, locator);
577 errorHandler.warning(spe);
578 }
580 // ]NOCPP]
582 @SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException {
583 tokenizer = self;
584 stack = new StackNode[64];
585 templateModeStack = new int[64];
586 listOfActiveFormattingElements = new StackNode[64];
587 needToDropLF = false;
588 originalMode = INITIAL;
589 templateModePtr = -1;
590 currentPtr = -1;
591 listPtr = -1;
592 formPointer = null;
593 headPointer = null;
594 deepTreeSurrogateParent = null;
595 // [NOCPP[
596 html4 = false;
597 idLocations.clear();
598 wantingComments = wantsComments();
599 firstCommentLocation = null;
600 // ]NOCPP]
601 start(fragment);
602 charBufferLen = 0;
603 charBuffer = new char[1024];
604 framesetOk = true;
605 if (fragment) {
606 T elt;
607 if (contextNode != null) {
608 elt = contextNode;
609 } else {
610 elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes());
611 }
612 StackNode<T> node = new StackNode<T>(ElementName.HTML, elt
613 // [NOCPP[
614 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
615 // ]NOCPP]
616 );
617 currentPtr++;
618 stack[currentPtr] = node;
619 if ("template" == contextName) {
620 pushTemplateMode(IN_TEMPLATE);
621 }
622 resetTheInsertionMode();
623 formPointer = getFormPointerForContext(contextNode);
624 if ("title" == contextName || "textarea" == contextName) {
625 tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, contextName);
626 } else if ("style" == contextName || "xmp" == contextName
627 || "iframe" == contextName || "noembed" == contextName
628 || "noframes" == contextName
629 || (scriptingEnabled && "noscript" == contextName)) {
630 tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, contextName);
631 } else if ("plaintext" == contextName) {
632 tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT, contextName);
633 } else if ("script" == contextName) {
634 tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA,
635 contextName);
636 } else {
637 tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, contextName);
638 }
639 contextName = null;
640 contextNode = null;
641 } else {
642 mode = INITIAL;
643 // If we are viewing XML source, put a foreign element permanently
644 // on the stack so that cdataSectionAllowed() returns true.
645 // CPPONLY: if (tokenizer.isViewingXmlSource()) {
646 // CPPONLY: T elt = createElement("http://www.w3.org/2000/svg",
647 // CPPONLY: "svg",
648 // CPPONLY: tokenizer.emptyAttributes());
649 // CPPONLY: StackNode<T> node = new StackNode<T>(ElementName.SVG,
650 // CPPONLY: "svg",
651 // CPPONLY: elt);
652 // CPPONLY: currentPtr++;
653 // CPPONLY: stack[currentPtr] = node;
654 // CPPONLY: }
655 }
656 }
658 public final void doctype(@Local String name, String publicIdentifier,
659 String systemIdentifier, boolean forceQuirks) throws SAXException {
660 needToDropLF = false;
661 if (!isInForeign() && mode == INITIAL) {
662 // [NOCPP[
663 if (reportingDoctype) {
664 // ]NOCPP]
665 String emptyString = Portability.newEmptyString();
666 appendDoctypeToDocument(name == null ? "" : name,
667 publicIdentifier == null ? emptyString
668 : publicIdentifier,
669 systemIdentifier == null ? emptyString
670 : systemIdentifier);
671 Portability.releaseString(emptyString);
672 // [NOCPP[
673 }
674 switch (doctypeExpectation) {
675 case HTML:
676 // ]NOCPP]
677 if (isQuirky(name, publicIdentifier, systemIdentifier,
678 forceQuirks)) {
679 errQuirkyDoctype();
680 documentModeInternal(DocumentMode.QUIRKS_MODE,
681 publicIdentifier, systemIdentifier, false);
682 } else if (isAlmostStandards(publicIdentifier,
683 systemIdentifier)) {
684 // [NOCPP[
685 if (firstCommentLocation != null) {
686 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
687 firstCommentLocation);
688 }
689 // ]NOCPP]
690 errAlmostStandardsDoctype();
691 documentModeInternal(
692 DocumentMode.ALMOST_STANDARDS_MODE,
693 publicIdentifier, systemIdentifier, false);
694 } else {
695 // [NOCPP[
696 if (firstCommentLocation != null) {
697 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
698 firstCommentLocation);
699 }
700 if ((Portability.literalEqualsString(
701 "-//W3C//DTD HTML 4.0//EN", publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
702 "http://www.w3.org/TR/REC-html40/strict.dtd",
703 systemIdentifier)))
704 || (Portability.literalEqualsString(
705 "-//W3C//DTD HTML 4.01//EN",
706 publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString(
707 "http://www.w3.org/TR/html4/strict.dtd",
708 systemIdentifier)))
709 || (Portability.literalEqualsString(
710 "-//W3C//DTD XHTML 1.0 Strict//EN",
711 publicIdentifier) && Portability.literalEqualsString(
712 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd",
713 systemIdentifier))
714 || (Portability.literalEqualsString(
715 "-//W3C//DTD XHTML 1.1//EN",
716 publicIdentifier) && Portability.literalEqualsString(
717 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd",
718 systemIdentifier))
720 ) {
721 warn("Obsolete doctype. Expected \u201C<!DOCTYPE html>\u201D.");
722 } else if (!((systemIdentifier == null || Portability.literalEqualsString(
723 "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) {
724 err("Legacy doctype. Expected \u201C<!DOCTYPE html>\u201D.");
725 }
726 // ]NOCPP]
727 documentModeInternal(DocumentMode.STANDARDS_MODE,
728 publicIdentifier, systemIdentifier, false);
729 }
730 // [NOCPP[
731 break;
732 case HTML401_STRICT:
733 html4 = true;
734 tokenizer.turnOnAdditionalHtml4Errors();
735 if (isQuirky(name, publicIdentifier, systemIdentifier,
736 forceQuirks)) {
737 err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
738 documentModeInternal(DocumentMode.QUIRKS_MODE,
739 publicIdentifier, systemIdentifier, true);
740 } else if (isAlmostStandards(publicIdentifier,
741 systemIdentifier)) {
742 if (firstCommentLocation != null) {
743 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
744 firstCommentLocation);
745 }
746 err("Almost standards mode doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
747 documentModeInternal(
748 DocumentMode.ALMOST_STANDARDS_MODE,
749 publicIdentifier, systemIdentifier, true);
750 } else {
751 if (firstCommentLocation != null) {
752 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
753 firstCommentLocation);
754 }
755 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
756 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
757 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
758 }
759 } else {
760 err("The doctype was not the HTML 4.01 Strict doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
761 }
762 documentModeInternal(DocumentMode.STANDARDS_MODE,
763 publicIdentifier, systemIdentifier, true);
764 }
765 break;
766 case HTML401_TRANSITIONAL:
767 html4 = true;
768 tokenizer.turnOnAdditionalHtml4Errors();
769 if (isQuirky(name, publicIdentifier, systemIdentifier,
770 forceQuirks)) {
771 err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
772 documentModeInternal(DocumentMode.QUIRKS_MODE,
773 publicIdentifier, systemIdentifier, true);
774 } else if (isAlmostStandards(publicIdentifier,
775 systemIdentifier)) {
776 if (firstCommentLocation != null) {
777 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
778 firstCommentLocation);
779 }
780 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)
781 && systemIdentifier != null) {
782 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
783 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
784 }
785 } else {
786 err("The doctype was not a non-quirky HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
787 }
788 documentModeInternal(
789 DocumentMode.ALMOST_STANDARDS_MODE,
790 publicIdentifier, systemIdentifier, true);
791 } else {
792 if (firstCommentLocation != null) {
793 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
794 firstCommentLocation);
795 }
796 err("The doctype was not the HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
797 documentModeInternal(DocumentMode.STANDARDS_MODE,
798 publicIdentifier, systemIdentifier, true);
799 }
800 break;
801 case AUTO:
802 html4 = isHtml4Doctype(publicIdentifier);
803 if (html4) {
804 tokenizer.turnOnAdditionalHtml4Errors();
805 }
806 if (isQuirky(name, publicIdentifier, systemIdentifier,
807 forceQuirks)) {
808 err("Quirky doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
809 documentModeInternal(DocumentMode.QUIRKS_MODE,
810 publicIdentifier, systemIdentifier, html4);
811 } else if (isAlmostStandards(publicIdentifier,
812 systemIdentifier)) {
813 if (firstCommentLocation != null) {
814 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
815 firstCommentLocation);
816 }
817 if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) {
818 if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) {
819 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
820 }
821 } else {
822 err("Almost standards mode doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
823 }
824 documentModeInternal(
825 DocumentMode.ALMOST_STANDARDS_MODE,
826 publicIdentifier, systemIdentifier, html4);
827 } else {
828 if (firstCommentLocation != null) {
829 warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.",
830 firstCommentLocation);
831 }
832 if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) {
833 if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) {
834 warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
835 }
836 } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) {
837 if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".equals(systemIdentifier)) {
838 warn("The doctype did not contain the system identifier prescribed by the XHTML 1.0 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\u201D.");
839 }
840 } else if ("//W3C//DTD XHTML 1.1//EN".equals(publicIdentifier)) {
841 if (!"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd".equals(systemIdentifier)) {
842 warn("The doctype did not contain the system identifier prescribed by the XHTML 1.1 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\u201D.");
843 }
844 } else if (!((systemIdentifier == null || Portability.literalEqualsString(
845 "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) {
846 err("Unexpected doctype. Expected, e.g., \u201C<!DOCTYPE html>\u201D.");
847 }
848 documentModeInternal(DocumentMode.STANDARDS_MODE,
849 publicIdentifier, systemIdentifier, html4);
850 }
851 break;
852 case NO_DOCTYPE_ERRORS:
853 if (isQuirky(name, publicIdentifier, systemIdentifier,
854 forceQuirks)) {
855 documentModeInternal(DocumentMode.QUIRKS_MODE,
856 publicIdentifier, systemIdentifier, false);
857 } else if (isAlmostStandards(publicIdentifier,
858 systemIdentifier)) {
859 documentModeInternal(
860 DocumentMode.ALMOST_STANDARDS_MODE,
861 publicIdentifier, systemIdentifier, false);
862 } else {
863 documentModeInternal(DocumentMode.STANDARDS_MODE,
864 publicIdentifier, systemIdentifier, false);
865 }
866 break;
867 }
868 // ]NOCPP]
870 /*
871 *
872 * Then, switch to the root element mode of the tree construction
873 * stage.
874 */
875 mode = BEFORE_HTML;
876 return;
877 }
878 /*
879 * A DOCTYPE token Parse error.
880 */
881 errStrayDoctype();
882 /*
883 * Ignore the token.
884 */
885 return;
886 }
888 // [NOCPP[
890 private boolean isHtml4Doctype(String publicIdentifier) {
891 if (publicIdentifier != null
892 && (Arrays.binarySearch(TreeBuilder.HTML4_PUBLIC_IDS,
893 publicIdentifier) > -1)) {
894 return true;
895 }
896 return false;
897 }
899 // ]NOCPP]
901 public final void comment(@NoLength char[] buf, int start, int length)
902 throws SAXException {
903 needToDropLF = false;
904 // [NOCPP[
905 if (firstCommentLocation == null) {
906 firstCommentLocation = new LocatorImpl(tokenizer);
907 }
908 if (!wantingComments) {
909 return;
910 }
911 // ]NOCPP]
912 if (!isInForeign()) {
913 switch (mode) {
914 case INITIAL:
915 case BEFORE_HTML:
916 case AFTER_AFTER_BODY:
917 case AFTER_AFTER_FRAMESET:
918 /*
919 * A comment token Append a Comment node to the Document
920 * object with the data attribute set to the data given in
921 * the comment token.
922 */
923 appendCommentToDocument(buf, start, length);
924 return;
925 case AFTER_BODY:
926 /*
927 * A comment token Append a Comment node to the first
928 * element in the stack of open elements (the html element),
929 * with the data attribute set to the data given in the
930 * comment token.
931 */
932 flushCharacters();
933 appendComment(stack[0].node, buf, start, length);
934 return;
935 default:
936 break;
937 }
938 }
939 /*
940 * A comment token Append a Comment node to the current node with the
941 * data attribute set to the data given in the comment token.
942 */
943 flushCharacters();
944 appendComment(stack[currentPtr].node, buf, start, length);
945 return;
946 }
948 /**
949 * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int,
950 * int)
951 */
952 public final void characters(@Const @NoLength char[] buf, int start, int length)
953 throws SAXException {
954 // Note: Can't attach error messages to EOF in C++ yet
956 // CPPONLY: if (tokenizer.isViewingXmlSource()) {
957 // CPPONLY: return;
958 // CPPONLY: }
959 if (needToDropLF) {
960 needToDropLF = false;
961 if (buf[start] == '\n') {
962 start++;
963 length--;
964 if (length == 0) {
965 return;
966 }
967 }
968 }
970 // optimize the most common case
971 switch (mode) {
972 case IN_BODY:
973 case IN_CELL:
974 case IN_CAPTION:
975 if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
976 reconstructTheActiveFormattingElements();
977 }
978 // fall through
979 case TEXT:
980 accumulateCharacters(buf, start, length);
981 return;
982 case IN_TABLE:
983 case IN_TABLE_BODY:
984 case IN_ROW:
985 accumulateCharactersForced(buf, start, length);
986 return;
987 default:
988 int end = start + length;
989 charactersloop: for (int i = start; i < end; i++) {
990 switch (buf[i]) {
991 case ' ':
992 case '\t':
993 case '\n':
994 case '\r':
995 case '\u000C':
996 /*
997 * A character token that is one of one of U+0009
998 * CHARACTER TABULATION, U+000A LINE FEED (LF),
999 * U+000C FORM FEED (FF), or U+0020 SPACE
1000 */
1001 switch (mode) {
1002 case INITIAL:
1003 case BEFORE_HTML:
1004 case BEFORE_HEAD:
1005 /*
1006 * Ignore the token.
1007 */
1008 start = i + 1;
1009 continue;
1010 case IN_HEAD:
1011 case IN_HEAD_NOSCRIPT:
1012 case AFTER_HEAD:
1013 case IN_COLUMN_GROUP:
1014 case IN_FRAMESET:
1015 case AFTER_FRAMESET:
1016 /*
1017 * Append the character to the current node.
1018 */
1019 continue;
1020 case FRAMESET_OK:
1021 case IN_TEMPLATE:
1022 case IN_BODY:
1023 case IN_CELL:
1024 case IN_CAPTION:
1025 if (start < i) {
1026 accumulateCharacters(buf, start, i
1027 - start);
1028 start = i;
1029 }
1031 /*
1032 * Reconstruct the active formatting
1033 * elements, if any.
1034 */
1035 if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
1036 flushCharacters();
1037 reconstructTheActiveFormattingElements();
1038 }
1039 /*
1040 * Append the token's character to the
1041 * current node.
1042 */
1043 break charactersloop;
1044 case IN_SELECT:
1045 case IN_SELECT_IN_TABLE:
1046 break charactersloop;
1047 case IN_TABLE:
1048 case IN_TABLE_BODY:
1049 case IN_ROW:
1050 accumulateCharactersForced(buf, i, 1);
1051 start = i + 1;
1052 continue;
1053 case AFTER_BODY:
1054 case AFTER_AFTER_BODY:
1055 case AFTER_AFTER_FRAMESET:
1056 if (start < i) {
1057 accumulateCharacters(buf, start, i
1058 - start);
1059 start = i;
1060 }
1061 /*
1062 * Reconstruct the active formatting
1063 * elements, if any.
1064 */
1065 flushCharacters();
1066 reconstructTheActiveFormattingElements();
1067 /*
1068 * Append the token's character to the
1069 * current node.
1070 */
1071 continue;
1072 }
1073 default:
1074 /*
1075 * A character token that is not one of one of
1076 * U+0009 CHARACTER TABULATION, U+000A LINE FEED
1077 * (LF), U+000C FORM FEED (FF), or U+0020 SPACE
1078 */
1079 switch (mode) {
1080 case INITIAL:
1081 /*
1082 * Parse error.
1083 */
1084 // [NOCPP[
1085 switch (doctypeExpectation) {
1086 case AUTO:
1087 err("Non-space characters found without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
1088 break;
1089 case HTML:
1090 // XXX figure out a way to report this in the Gecko View Source case
1091 err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
1092 break;
1093 case HTML401_STRICT:
1094 err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
1095 break;
1096 case HTML401_TRANSITIONAL:
1097 err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
1098 break;
1099 case NO_DOCTYPE_ERRORS:
1100 }
1101 // ]NOCPP]
1102 /*
1103 *
1104 * Set the document to quirks mode.
1105 */
1106 documentModeInternal(
1107 DocumentMode.QUIRKS_MODE, null,
1108 null, false);
1109 /*
1110 * Then, switch to the root element mode of
1111 * the tree construction stage
1112 */
1113 mode = BEFORE_HTML;
1114 /*
1115 * and reprocess the current token.
1116 */
1117 i--;
1118 continue;
1119 case BEFORE_HTML:
1120 /*
1121 * Create an HTMLElement node with the tag
1122 * name html, in the HTML namespace. Append
1123 * it to the Document object.
1124 */
1125 // No need to flush characters here,
1126 // because there's nothing to flush.
1127 appendHtmlElementToDocumentAndPush();
1128 /* Switch to the main mode */
1129 mode = BEFORE_HEAD;
1130 /*
1131 * reprocess the current token.
1132 */
1133 i--;
1134 continue;
1135 case BEFORE_HEAD:
1136 if (start < i) {
1137 accumulateCharacters(buf, start, i
1138 - start);
1139 start = i;
1140 }
1141 /*
1142 * /Act as if a start tag token with the tag
1143 * name "head" and no attributes had been
1144 * seen,
1145 */
1146 flushCharacters();
1147 appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
1148 mode = IN_HEAD;
1149 /*
1150 * then reprocess the current token.
1151 *
1152 * This will result in an empty head element
1153 * being generated, with the current token
1154 * being reprocessed in the "after head"
1155 * insertion mode.
1156 */
1157 i--;
1158 continue;
1159 case IN_HEAD:
1160 if (start < i) {
1161 accumulateCharacters(buf, start, i
1162 - start);
1163 start = i;
1164 }
1165 /*
1166 * Act as if an end tag token with the tag
1167 * name "head" had been seen,
1168 */
1169 flushCharacters();
1170 pop();
1171 mode = AFTER_HEAD;
1172 /*
1173 * and reprocess the current token.
1174 */
1175 i--;
1176 continue;
1177 case IN_HEAD_NOSCRIPT:
1178 if (start < i) {
1179 accumulateCharacters(buf, start, i
1180 - start);
1181 start = i;
1182 }
1183 /*
1184 * Parse error. Act as if an end tag with
1185 * the tag name "noscript" had been seen
1186 */
1187 errNonSpaceInNoscriptInHead();
1188 flushCharacters();
1189 pop();
1190 mode = IN_HEAD;
1191 /*
1192 * and reprocess the current token.
1193 */
1194 i--;
1195 continue;
1196 case AFTER_HEAD:
1197 if (start < i) {
1198 accumulateCharacters(buf, start, i
1199 - start);
1200 start = i;
1201 }
1202 /*
1203 * Act as if a start tag token with the tag
1204 * name "body" and no attributes had been
1205 * seen,
1206 */
1207 flushCharacters();
1208 appendToCurrentNodeAndPushBodyElement();
1209 mode = FRAMESET_OK;
1210 /*
1211 * and then reprocess the current token.
1212 */
1213 i--;
1214 continue;
1215 case FRAMESET_OK:
1216 framesetOk = false;
1217 mode = IN_BODY;
1218 i--;
1219 continue;
1220 case IN_TEMPLATE:
1221 case IN_BODY:
1222 case IN_CELL:
1223 case IN_CAPTION:
1224 if (start < i) {
1225 accumulateCharacters(buf, start, i
1226 - start);
1227 start = i;
1228 }
1229 /*
1230 * Reconstruct the active formatting
1231 * elements, if any.
1232 */
1233 if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) {
1234 flushCharacters();
1235 reconstructTheActiveFormattingElements();
1236 }
1237 /*
1238 * Append the token's character to the
1239 * current node.
1240 */
1241 break charactersloop;
1242 case IN_TABLE:
1243 case IN_TABLE_BODY:
1244 case IN_ROW:
1245 accumulateCharactersForced(buf, i, 1);
1246 start = i + 1;
1247 continue;
1248 case IN_COLUMN_GROUP:
1249 if (start < i) {
1250 accumulateCharacters(buf, start, i
1251 - start);
1252 start = i;
1253 }
1254 /*
1255 * Act as if an end tag with the tag name
1256 * "colgroup" had been seen, and then, if
1257 * that token wasn't ignored, reprocess the
1258 * current token.
1259 */
1260 if (currentPtr == 0 || stack[currentPtr].getGroup() ==
1261 TreeBuilder.TEMPLATE) {
1262 errNonSpaceInColgroupInFragment();
1263 start = i + 1;
1264 continue;
1265 }
1266 flushCharacters();
1267 pop();
1268 mode = IN_TABLE;
1269 i--;
1270 continue;
1271 case IN_SELECT:
1272 case IN_SELECT_IN_TABLE:
1273 break charactersloop;
1274 case AFTER_BODY:
1275 errNonSpaceAfterBody();
1276 fatal();
1277 mode = framesetOk ? FRAMESET_OK : IN_BODY;
1278 i--;
1279 continue;
1280 case IN_FRAMESET:
1281 if (start < i) {
1282 accumulateCharacters(buf, start, i
1283 - start);
1284 start = i;
1285 }
1286 /*
1287 * Parse error.
1288 */
1289 errNonSpaceInFrameset();
1290 /*
1291 * Ignore the token.
1292 */
1293 start = i + 1;
1294 continue;
1295 case AFTER_FRAMESET:
1296 if (start < i) {
1297 accumulateCharacters(buf, start, i
1298 - start);
1299 start = i;
1300 }
1301 /*
1302 * Parse error.
1303 */
1304 errNonSpaceAfterFrameset();
1305 /*
1306 * Ignore the token.
1307 */
1308 start = i + 1;
1309 continue;
1310 case AFTER_AFTER_BODY:
1311 /*
1312 * Parse error.
1313 */
1314 errNonSpaceInTrailer();
1315 /*
1316 * Switch back to the main mode and
1317 * reprocess the token.
1318 */
1319 mode = framesetOk ? FRAMESET_OK : IN_BODY;
1320 i--;
1321 continue;
1322 case AFTER_AFTER_FRAMESET:
1323 errNonSpaceInTrailer();
1324 /*
1325 * Switch back to the main mode and
1326 * reprocess the token.
1327 */
1328 mode = IN_FRAMESET;
1329 i--;
1330 continue;
1331 }
1332 }
1333 }
1334 if (start < end) {
1335 accumulateCharacters(buf, start, end - start);
1336 }
1337 }
1338 }
1340 /**
1341 * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter()
1342 */
1343 public void zeroOriginatingReplacementCharacter() throws SAXException {
1344 if (mode == TEXT) {
1345 accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
1346 return;
1347 }
1348 if (currentPtr >= 0) {
1349 if (isSpecialParentInForeign(stack[currentPtr])) {
1350 return;
1351 }
1352 accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
1353 }
1354 }
1356 public final void eof() throws SAXException {
1357 flushCharacters();
1358 // Note: Can't attach error messages to EOF in C++ yet
1359 eofloop: for (;;) {
1360 switch (mode) {
1361 case INITIAL:
1362 /*
1363 * Parse error.
1364 */
1365 // [NOCPP[
1366 switch (doctypeExpectation) {
1367 case AUTO:
1368 err("End of file seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
1369 break;
1370 case HTML:
1371 err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
1372 break;
1373 case HTML401_STRICT:
1374 err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
1375 break;
1376 case HTML401_TRANSITIONAL:
1377 err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
1378 break;
1379 case NO_DOCTYPE_ERRORS:
1380 }
1381 // ]NOCPP]
1382 /*
1383 *
1384 * Set the document to quirks mode.
1385 */
1386 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
1387 false);
1388 /*
1389 * Then, switch to the root element mode of the tree
1390 * construction stage
1391 */
1392 mode = BEFORE_HTML;
1393 /*
1394 * and reprocess the current token.
1395 */
1396 continue;
1397 case BEFORE_HTML:
1398 /*
1399 * Create an HTMLElement node with the tag name html, in the
1400 * HTML namespace. Append it to the Document object.
1401 */
1402 appendHtmlElementToDocumentAndPush();
1403 // XXX application cache manifest
1404 /* Switch to the main mode */
1405 mode = BEFORE_HEAD;
1406 /*
1407 * reprocess the current token.
1408 */
1409 continue;
1410 case BEFORE_HEAD:
1411 appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
1412 mode = IN_HEAD;
1413 continue;
1414 case IN_HEAD:
1415 // [NOCPP[
1416 if (errorHandler != null && currentPtr > 1) {
1417 errEofWithUnclosedElements();
1418 }
1419 // ]NOCPP]
1420 while (currentPtr > 0) {
1421 popOnEof();
1422 }
1423 mode = AFTER_HEAD;
1424 continue;
1425 case IN_HEAD_NOSCRIPT:
1426 // [NOCPP[
1427 errEofWithUnclosedElements();
1428 // ]NOCPP]
1429 while (currentPtr > 1) {
1430 popOnEof();
1431 }
1432 mode = IN_HEAD;
1433 continue;
1434 case AFTER_HEAD:
1435 appendToCurrentNodeAndPushBodyElement();
1436 mode = IN_BODY;
1437 continue;
1438 case IN_TABLE_BODY:
1439 case IN_ROW:
1440 case IN_TABLE:
1441 case IN_SELECT_IN_TABLE:
1442 case IN_SELECT:
1443 case IN_COLUMN_GROUP:
1444 case FRAMESET_OK:
1445 case IN_CAPTION:
1446 case IN_CELL:
1447 case IN_BODY:
1448 // [NOCPP[
1449 openelementloop: for (int i = currentPtr; i >= 0; i--) {
1450 int group = stack[i].getGroup();
1451 switch (group) {
1452 case DD_OR_DT:
1453 case LI:
1454 case P:
1455 case TBODY_OR_THEAD_OR_TFOOT:
1456 case TD_OR_TH:
1457 case BODY:
1458 case HTML:
1459 break;
1460 default:
1461 errEofWithUnclosedElements();
1462 break openelementloop;
1463 }
1464 }
1465 // ]NOCPP]
1467 if (isTemplateModeStackEmpty()) {
1468 break eofloop;
1469 }
1471 // fall through to IN_TEMPLATE
1472 case IN_TEMPLATE:
1473 int eltPos = findLast("template");
1474 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
1475 assert fragment;
1476 break eofloop;
1477 }
1478 if (errorHandler != null) {
1479 errUnclosedElements(eltPos, "template");
1480 }
1481 while (currentPtr >= eltPos) {
1482 pop();
1483 }
1484 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
1485 popTemplateMode();
1486 resetTheInsertionMode();
1488 // Reprocess token.
1489 continue;
1490 case TEXT:
1491 // [NOCPP[
1492 if (errorHandler != null) {
1493 errNoCheck("End of file seen when expecting text or an end tag.");
1494 errListUnclosedStartTags(0);
1495 }
1496 // ]NOCPP]
1497 // XXX mark script as already executed
1498 if (originalMode == AFTER_HEAD) {
1499 popOnEof();
1500 }
1501 popOnEof();
1502 mode = originalMode;
1503 continue;
1504 case IN_FRAMESET:
1505 // [NOCPP[
1506 if (errorHandler != null && currentPtr > 0) {
1507 errEofWithUnclosedElements();
1508 }
1509 // ]NOCPP]
1510 break eofloop;
1511 case AFTER_BODY:
1512 case AFTER_FRAMESET:
1513 case AFTER_AFTER_BODY:
1514 case AFTER_AFTER_FRAMESET:
1515 default:
1516 // [NOCPP[
1517 if (currentPtr == 0) { // This silliness is here to poison
1518 // buggy compiler optimizations in
1519 // GWT
1520 System.currentTimeMillis();
1521 }
1522 // ]NOCPP]
1523 break eofloop;
1524 }
1525 }
1526 while (currentPtr > 0) {
1527 popOnEof();
1528 }
1529 if (!fragment) {
1530 popOnEof();
1531 }
1532 /* Stop parsing. */
1533 }
1535 /**
1536 * @see nu.validator.htmlparser.common.TokenHandler#endTokenization()
1537 */
1538 public final void endTokenization() throws SAXException {
1539 formPointer = null;
1540 headPointer = null;
1541 deepTreeSurrogateParent = null;
1542 templateModeStack = null;
1543 if (stack != null) {
1544 while (currentPtr > -1) {
1545 stack[currentPtr].release();
1546 currentPtr--;
1547 }
1548 stack = null;
1549 }
1550 if (listOfActiveFormattingElements != null) {
1551 while (listPtr > -1) {
1552 if (listOfActiveFormattingElements[listPtr] != null) {
1553 listOfActiveFormattingElements[listPtr].release();
1554 }
1555 listPtr--;
1556 }
1557 listOfActiveFormattingElements = null;
1558 }
1559 // [NOCPP[
1560 idLocations.clear();
1561 // ]NOCPP]
1562 charBuffer = null;
1563 end();
1564 }
1566 public final void startTag(ElementName elementName,
1567 HtmlAttributes attributes, boolean selfClosing) throws SAXException {
1568 flushCharacters();
1570 // [NOCPP[
1571 if (errorHandler != null) {
1572 // ID uniqueness
1573 @IdType String id = attributes.getId();
1574 if (id != null) {
1575 LocatorImpl oldLoc = idLocations.get(id);
1576 if (oldLoc != null) {
1577 err("Duplicate ID \u201C" + id + "\u201D.");
1578 errorHandler.warning(new SAXParseException(
1579 "The first occurrence of ID \u201C" + id
1580 + "\u201D was here.", oldLoc));
1581 } else {
1582 idLocations.put(id, new LocatorImpl(tokenizer));
1583 }
1584 }
1585 }
1586 // ]NOCPP]
1588 int eltPos;
1589 needToDropLF = false;
1590 starttagloop: for (;;) {
1591 int group = elementName.getGroup();
1592 @Local String name = elementName.name;
1593 if (isInForeign()) {
1594 StackNode<T> currentNode = stack[currentPtr];
1595 @NsUri String currNs = currentNode.ns;
1596 if (!(currentNode.isHtmlIntegrationPoint() || (currNs == "http://www.w3.org/1998/Math/MathML" && ((currentNode.getGroup() == MI_MO_MN_MS_MTEXT && group != MGLYPH_OR_MALIGNMARK) || (currentNode.getGroup() == ANNOTATION_XML && group == SVG))))) {
1597 switch (group) {
1598 case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
1599 case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
1600 case BODY:
1601 case BR:
1602 case RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR:
1603 case DD_OR_DT:
1604 case UL_OR_OL_OR_DL:
1605 case EMBED:
1606 case IMG:
1607 case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
1608 case HEAD:
1609 case HR:
1610 case LI:
1611 case META:
1612 case NOBR:
1613 case P:
1614 case PRE_OR_LISTING:
1615 case TABLE:
1616 errHtmlStartTagInForeignContext(name);
1617 while (!isSpecialParentInForeign(stack[currentPtr])) {
1618 pop();
1619 }
1620 continue starttagloop;
1621 case FONT:
1622 if (attributes.contains(AttributeName.COLOR)
1623 || attributes.contains(AttributeName.FACE)
1624 || attributes.contains(AttributeName.SIZE)) {
1625 errHtmlStartTagInForeignContext(name);
1626 while (!isSpecialParentInForeign(stack[currentPtr])) {
1627 pop();
1628 }
1629 continue starttagloop;
1630 }
1631 // else fall thru
1632 default:
1633 if ("http://www.w3.org/2000/svg" == currNs) {
1634 attributes.adjustForSvg();
1635 if (selfClosing) {
1636 appendVoidElementToCurrentMayFosterSVG(
1637 elementName, attributes);
1638 selfClosing = false;
1639 } else {
1640 appendToCurrentNodeAndPushElementMayFosterSVG(
1641 elementName, attributes);
1642 }
1643 attributes = null; // CPP
1644 break starttagloop;
1645 } else {
1646 attributes.adjustForMath();
1647 if (selfClosing) {
1648 appendVoidElementToCurrentMayFosterMathML(
1649 elementName, attributes);
1650 selfClosing = false;
1651 } else {
1652 appendToCurrentNodeAndPushElementMayFosterMathML(
1653 elementName, attributes);
1654 }
1655 attributes = null; // CPP
1656 break starttagloop;
1657 }
1658 } // switch
1659 } // foreignObject / annotation-xml
1660 }
1661 switch (mode) {
1662 case IN_TEMPLATE:
1663 switch (group) {
1664 case COL:
1665 popTemplateMode();
1666 pushTemplateMode(IN_COLUMN_GROUP);
1667 mode = IN_COLUMN_GROUP;
1668 // Reprocess token.
1669 continue;
1670 case CAPTION:
1671 case COLGROUP:
1672 case TBODY_OR_THEAD_OR_TFOOT:
1673 popTemplateMode();
1674 pushTemplateMode(IN_TABLE);
1675 mode = IN_TABLE;
1676 // Reprocess token.
1677 continue;
1678 case TR:
1679 popTemplateMode();
1680 pushTemplateMode(IN_TABLE_BODY);
1681 mode = IN_TABLE_BODY;
1682 // Reprocess token.
1683 continue;
1684 case TD_OR_TH:
1685 popTemplateMode();
1686 pushTemplateMode(IN_ROW);
1687 mode = IN_ROW;
1688 // Reprocess token.
1689 continue;
1690 case META:
1691 checkMetaCharset(attributes);
1692 appendVoidElementToCurrentMayFoster(
1693 elementName,
1694 attributes);
1695 selfClosing = false;
1696 attributes = null; // CPP
1697 break starttagloop;
1698 case TITLE:
1699 startTagTitleInHead(elementName, attributes);
1700 attributes = null; // CPP
1701 break starttagloop;
1702 case BASE:
1703 case LINK_OR_BASEFONT_OR_BGSOUND:
1704 appendVoidElementToCurrentMayFoster(
1705 elementName,
1706 attributes);
1707 selfClosing = false;
1708 attributes = null; // CPP
1709 break starttagloop;
1710 case SCRIPT:
1711 startTagScriptInHead(elementName, attributes);
1712 attributes = null; // CPP
1713 break starttagloop;
1714 case NOFRAMES:
1715 case STYLE:
1716 startTagGenericRawText(elementName, attributes);
1717 attributes = null; // CPP
1718 break starttagloop;
1719 case TEMPLATE:
1720 startTagTemplateInHead(elementName, attributes);
1721 attributes = null; // CPP
1722 break starttagloop;
1723 default:
1724 popTemplateMode();
1725 pushTemplateMode(IN_BODY);
1726 mode = IN_BODY;
1727 // Reprocess token.
1728 continue;
1729 }
1730 case IN_ROW:
1731 switch (group) {
1732 case TD_OR_TH:
1733 clearStackBackTo(findLastOrRoot(TreeBuilder.TR));
1734 appendToCurrentNodeAndPushElement(
1735 elementName,
1736 attributes);
1737 mode = IN_CELL;
1738 insertMarker();
1739 attributes = null; // CPP
1740 break starttagloop;
1741 case CAPTION:
1742 case COL:
1743 case COLGROUP:
1744 case TBODY_OR_THEAD_OR_TFOOT:
1745 case TR:
1746 eltPos = findLastOrRoot(TreeBuilder.TR);
1747 if (eltPos == 0) {
1748 assert fragment || isTemplateContents();
1749 errNoTableRowToClose();
1750 break starttagloop;
1751 }
1752 clearStackBackTo(eltPos);
1753 pop();
1754 mode = IN_TABLE_BODY;
1755 continue;
1756 default:
1757 // fall through to IN_TABLE
1758 }
1759 case IN_TABLE_BODY:
1760 switch (group) {
1761 case TR:
1762 clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot());
1763 appendToCurrentNodeAndPushElement(
1764 elementName,
1765 attributes);
1766 mode = IN_ROW;
1767 attributes = null; // CPP
1768 break starttagloop;
1769 case TD_OR_TH:
1770 errStartTagInTableBody(name);
1771 clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot());
1772 appendToCurrentNodeAndPushElement(
1773 ElementName.TR,
1774 HtmlAttributes.EMPTY_ATTRIBUTES);
1775 mode = IN_ROW;
1776 continue;
1777 case CAPTION:
1778 case COL:
1779 case COLGROUP:
1780 case TBODY_OR_THEAD_OR_TFOOT:
1781 eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot();
1782 if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) {
1783 assert fragment || isTemplateContents();
1784 errStrayStartTag(name);
1785 break starttagloop;
1786 } else {
1787 clearStackBackTo(eltPos);
1788 pop();
1789 mode = IN_TABLE;
1790 continue;
1791 }
1792 default:
1793 // fall through to IN_TABLE
1794 }
1795 case IN_TABLE:
1796 intableloop: for (;;) {
1797 switch (group) {
1798 case CAPTION:
1799 clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
1800 insertMarker();
1801 appendToCurrentNodeAndPushElement(
1802 elementName,
1803 attributes);
1804 mode = IN_CAPTION;
1805 attributes = null; // CPP
1806 break starttagloop;
1807 case COLGROUP:
1808 clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
1809 appendToCurrentNodeAndPushElement(
1810 elementName,
1811 attributes);
1812 mode = IN_COLUMN_GROUP;
1813 attributes = null; // CPP
1814 break starttagloop;
1815 case COL:
1816 clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
1817 appendToCurrentNodeAndPushElement(
1818 ElementName.COLGROUP,
1819 HtmlAttributes.EMPTY_ATTRIBUTES);
1820 mode = IN_COLUMN_GROUP;
1821 continue starttagloop;
1822 case TBODY_OR_THEAD_OR_TFOOT:
1823 clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
1824 appendToCurrentNodeAndPushElement(
1825 elementName,
1826 attributes);
1827 mode = IN_TABLE_BODY;
1828 attributes = null; // CPP
1829 break starttagloop;
1830 case TR:
1831 case TD_OR_TH:
1832 clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE));
1833 appendToCurrentNodeAndPushElement(
1834 ElementName.TBODY,
1835 HtmlAttributes.EMPTY_ATTRIBUTES);
1836 mode = IN_TABLE_BODY;
1837 continue starttagloop;
1838 case TEMPLATE:
1839 // fall through to IN_HEAD
1840 break intableloop;
1841 case TABLE:
1842 errTableSeenWhileTableOpen();
1843 eltPos = findLastInTableScope(name);
1844 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
1845 assert fragment || isTemplateContents();
1846 break starttagloop;
1847 }
1848 generateImpliedEndTags();
1849 // XXX is the next if dead code?
1850 if (errorHandler != null && !isCurrent("table")) {
1851 errNoCheckUnclosedElementsOnStack();
1852 }
1853 while (currentPtr >= eltPos) {
1854 pop();
1855 }
1856 resetTheInsertionMode();
1857 continue starttagloop;
1858 case SCRIPT:
1859 // XXX need to manage much more stuff
1860 // here if
1861 // supporting
1862 // document.write()
1863 appendToCurrentNodeAndPushElement(
1864 elementName,
1865 attributes);
1866 originalMode = mode;
1867 mode = TEXT;
1868 tokenizer.setStateAndEndTagExpectation(
1869 Tokenizer.SCRIPT_DATA, elementName);
1870 attributes = null; // CPP
1871 break starttagloop;
1872 case STYLE:
1873 appendToCurrentNodeAndPushElement(
1874 elementName,
1875 attributes);
1876 originalMode = mode;
1877 mode = TEXT;
1878 tokenizer.setStateAndEndTagExpectation(
1879 Tokenizer.RAWTEXT, elementName);
1880 attributes = null; // CPP
1881 break starttagloop;
1882 case INPUT:
1883 errStartTagInTable(name);
1884 if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
1885 "hidden",
1886 attributes.getValue(AttributeName.TYPE))) {
1887 break intableloop;
1888 }
1889 appendVoidElementToCurrent(
1890 name, attributes,
1891 formPointer);
1892 selfClosing = false;
1893 attributes = null; // CPP
1894 break starttagloop;
1895 case FORM:
1896 if (formPointer != null || isTemplateContents()) {
1897 errFormWhenFormOpen();
1898 break starttagloop;
1899 } else {
1900 errStartTagInTable(name);
1901 appendVoidFormToCurrent(attributes);
1902 attributes = null; // CPP
1903 break starttagloop;
1904 }
1905 default:
1906 errStartTagInTable(name);
1907 // fall through to IN_BODY
1908 break intableloop;
1909 }
1910 }
1911 case IN_CAPTION:
1912 switch (group) {
1913 case CAPTION:
1914 case COL:
1915 case COLGROUP:
1916 case TBODY_OR_THEAD_OR_TFOOT:
1917 case TR:
1918 case TD_OR_TH:
1919 errStrayStartTag(name);
1920 eltPos = findLastInTableScope("caption");
1921 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
1922 break starttagloop;
1923 }
1924 generateImpliedEndTags();
1925 if (errorHandler != null && currentPtr != eltPos) {
1926 errNoCheckUnclosedElementsOnStack();
1927 }
1928 while (currentPtr >= eltPos) {
1929 pop();
1930 }
1931 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
1932 mode = IN_TABLE;
1933 continue;
1934 default:
1935 // fall through to IN_BODY
1936 }
1937 case IN_CELL:
1938 switch (group) {
1939 case CAPTION:
1940 case COL:
1941 case COLGROUP:
1942 case TBODY_OR_THEAD_OR_TFOOT:
1943 case TR:
1944 case TD_OR_TH:
1945 eltPos = findLastInTableScopeTdTh();
1946 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
1947 errNoCellToClose();
1948 break starttagloop;
1949 } else {
1950 closeTheCell(eltPos);
1951 continue;
1952 }
1953 default:
1954 // fall through to IN_BODY
1955 }
1956 case FRAMESET_OK:
1957 switch (group) {
1958 case FRAMESET:
1959 if (mode == FRAMESET_OK) {
1960 if (currentPtr == 0 || stack[1].getGroup() != BODY) {
1961 assert fragment || isTemplateContents();
1962 errStrayStartTag(name);
1963 break starttagloop;
1964 } else {
1965 errFramesetStart();
1966 detachFromParent(stack[1].node);
1967 while (currentPtr > 0) {
1968 pop();
1969 }
1970 appendToCurrentNodeAndPushElement(
1971 elementName,
1972 attributes);
1973 mode = IN_FRAMESET;
1974 attributes = null; // CPP
1975 break starttagloop;
1976 }
1977 } else {
1978 errStrayStartTag(name);
1979 break starttagloop;
1980 }
1981 // NOT falling through!
1982 case PRE_OR_LISTING:
1983 case LI:
1984 case DD_OR_DT:
1985 case BUTTON:
1986 case MARQUEE_OR_APPLET:
1987 case OBJECT:
1988 case TABLE:
1989 case AREA_OR_WBR:
1990 case BR:
1991 case EMBED:
1992 case IMG:
1993 case INPUT:
1994 case KEYGEN:
1995 case HR:
1996 case TEXTAREA:
1997 case XMP:
1998 case IFRAME:
1999 case SELECT:
2000 if (mode == FRAMESET_OK
2001 && !(group == INPUT && Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
2002 "hidden",
2003 attributes.getValue(AttributeName.TYPE)))) {
2004 framesetOk = false;
2005 mode = IN_BODY;
2006 }
2007 // fall through to IN_BODY
2008 default:
2009 // fall through to IN_BODY
2010 }
2011 case IN_BODY:
2012 inbodyloop: for (;;) {
2013 switch (group) {
2014 case HTML:
2015 errStrayStartTag(name);
2016 if (!fragment && !isTemplateContents()) {
2017 addAttributesToHtml(attributes);
2018 attributes = null; // CPP
2019 }
2020 break starttagloop;
2021 case BASE:
2022 case LINK_OR_BASEFONT_OR_BGSOUND:
2023 case META:
2024 case STYLE:
2025 case SCRIPT:
2026 case TITLE:
2027 case TEMPLATE:
2028 // Fall through to IN_HEAD
2029 break inbodyloop;
2030 case BODY:
2031 if (currentPtr == 0 || stack[1].getGroup() != BODY || isTemplateContents()) {
2032 assert fragment || isTemplateContents();
2033 errStrayStartTag(name);
2034 break starttagloop;
2035 }
2036 errFooSeenWhenFooOpen(name);
2037 framesetOk = false;
2038 if (mode == FRAMESET_OK) {
2039 mode = IN_BODY;
2040 }
2041 if (addAttributesToBody(attributes)) {
2042 attributes = null; // CPP
2043 }
2044 break starttagloop;
2045 case P:
2046 case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
2047 case UL_OR_OL_OR_DL:
2048 case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
2049 implicitlyCloseP();
2050 appendToCurrentNodeAndPushElementMayFoster(
2051 elementName,
2052 attributes);
2053 attributes = null; // CPP
2054 break starttagloop;
2055 case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
2056 implicitlyCloseP();
2057 if (stack[currentPtr].getGroup() == H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) {
2058 errHeadingWhenHeadingOpen();
2059 pop();
2060 }
2061 appendToCurrentNodeAndPushElementMayFoster(
2062 elementName,
2063 attributes);
2064 attributes = null; // CPP
2065 break starttagloop;
2066 case FIELDSET:
2067 implicitlyCloseP();
2068 appendToCurrentNodeAndPushElementMayFoster(
2069 elementName,
2070 attributes, formPointer);
2071 attributes = null; // CPP
2072 break starttagloop;
2073 case PRE_OR_LISTING:
2074 implicitlyCloseP();
2075 appendToCurrentNodeAndPushElementMayFoster(
2076 elementName,
2077 attributes);
2078 needToDropLF = true;
2079 attributes = null; // CPP
2080 break starttagloop;
2081 case FORM:
2082 if (formPointer != null && !isTemplateContents()) {
2083 errFormWhenFormOpen();
2084 break starttagloop;
2085 } else {
2086 implicitlyCloseP();
2087 appendToCurrentNodeAndPushFormElementMayFoster(attributes);
2088 attributes = null; // CPP
2089 break starttagloop;
2090 }
2091 case LI:
2092 case DD_OR_DT:
2093 eltPos = currentPtr;
2094 for (;;) {
2095 StackNode<T> node = stack[eltPos]; // weak
2096 // ref
2097 if (node.getGroup() == group) { // LI or
2098 // DD_OR_DT
2099 generateImpliedEndTagsExceptFor(node.name);
2100 if (errorHandler != null
2101 && eltPos != currentPtr) {
2102 errUnclosedElementsImplied(eltPos, name);
2103 }
2104 while (currentPtr >= eltPos) {
2105 pop();
2106 }
2107 break;
2108 } else if (node.isSpecial()
2109 && (node.ns != "http://www.w3.org/1999/xhtml"
2110 || (node.name != "p"
2111 && node.name != "address"
2112 && node.name != "div"))) {
2113 break;
2114 }
2115 eltPos--;
2116 }
2117 implicitlyCloseP();
2118 appendToCurrentNodeAndPushElementMayFoster(
2119 elementName,
2120 attributes);
2121 attributes = null; // CPP
2122 break starttagloop;
2123 case PLAINTEXT:
2124 implicitlyCloseP();
2125 appendToCurrentNodeAndPushElementMayFoster(
2126 elementName,
2127 attributes);
2128 tokenizer.setStateAndEndTagExpectation(
2129 Tokenizer.PLAINTEXT, elementName);
2130 attributes = null; // CPP
2131 break starttagloop;
2132 case A:
2133 int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a");
2134 if (activeAPos != -1) {
2135 errFooSeenWhenFooOpen(name);
2136 StackNode<T> activeA = listOfActiveFormattingElements[activeAPos];
2137 activeA.retain();
2138 adoptionAgencyEndTag("a");
2139 removeFromStack(activeA);
2140 activeAPos = findInListOfActiveFormattingElements(activeA);
2141 if (activeAPos != -1) {
2142 removeFromListOfActiveFormattingElements(activeAPos);
2143 }
2144 activeA.release();
2145 }
2146 reconstructTheActiveFormattingElements();
2147 appendToCurrentNodeAndPushFormattingElementMayFoster(
2148 elementName,
2149 attributes);
2150 attributes = null; // CPP
2151 break starttagloop;
2152 case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
2153 case FONT:
2154 reconstructTheActiveFormattingElements();
2155 maybeForgetEarlierDuplicateFormattingElement(elementName.name, attributes);
2156 appendToCurrentNodeAndPushFormattingElementMayFoster(
2157 elementName,
2158 attributes);
2159 attributes = null; // CPP
2160 break starttagloop;
2161 case NOBR:
2162 reconstructTheActiveFormattingElements();
2163 if (TreeBuilder.NOT_FOUND_ON_STACK != findLastInScope("nobr")) {
2164 errFooSeenWhenFooOpen(name);
2165 adoptionAgencyEndTag("nobr");
2166 reconstructTheActiveFormattingElements();
2167 }
2168 appendToCurrentNodeAndPushFormattingElementMayFoster(
2169 elementName,
2170 attributes);
2171 attributes = null; // CPP
2172 break starttagloop;
2173 case BUTTON:
2174 eltPos = findLastInScope(name);
2175 if (eltPos != TreeBuilder.NOT_FOUND_ON_STACK) {
2176 errFooSeenWhenFooOpen(name);
2177 generateImpliedEndTags();
2178 if (errorHandler != null
2179 && !isCurrent(name)) {
2180 errUnclosedElementsImplied(eltPos, name);
2181 }
2182 while (currentPtr >= eltPos) {
2183 pop();
2184 }
2185 continue starttagloop;
2186 } else {
2187 reconstructTheActiveFormattingElements();
2188 appendToCurrentNodeAndPushElementMayFoster(
2189 elementName,
2190 attributes, formPointer);
2191 attributes = null; // CPP
2192 break starttagloop;
2193 }
2194 case OBJECT:
2195 reconstructTheActiveFormattingElements();
2196 appendToCurrentNodeAndPushElementMayFoster(
2197 elementName,
2198 attributes, formPointer);
2199 insertMarker();
2200 attributes = null; // CPP
2201 break starttagloop;
2202 case MARQUEE_OR_APPLET:
2203 reconstructTheActiveFormattingElements();
2204 appendToCurrentNodeAndPushElementMayFoster(
2205 elementName,
2206 attributes);
2207 insertMarker();
2208 attributes = null; // CPP
2209 break starttagloop;
2210 case TABLE:
2211 // The only quirk. Blame Hixie and
2212 // Acid2.
2213 if (!quirks) {
2214 implicitlyCloseP();
2215 }
2216 appendToCurrentNodeAndPushElementMayFoster(
2217 elementName,
2218 attributes);
2219 mode = IN_TABLE;
2220 attributes = null; // CPP
2221 break starttagloop;
2222 case BR:
2223 case EMBED:
2224 case AREA_OR_WBR:
2225 reconstructTheActiveFormattingElements();
2226 // FALL THROUGH to PARAM_OR_SOURCE_OR_TRACK
2227 // CPPONLY: case MENUITEM:
2228 case PARAM_OR_SOURCE_OR_TRACK:
2229 appendVoidElementToCurrentMayFoster(
2230 elementName,
2231 attributes);
2232 selfClosing = false;
2233 attributes = null; // CPP
2234 break starttagloop;
2235 case HR:
2236 implicitlyCloseP();
2237 appendVoidElementToCurrentMayFoster(
2238 elementName,
2239 attributes);
2240 selfClosing = false;
2241 attributes = null; // CPP
2242 break starttagloop;
2243 case IMAGE:
2244 errImage();
2245 elementName = ElementName.IMG;
2246 continue starttagloop;
2247 case IMG:
2248 case KEYGEN:
2249 case INPUT:
2250 reconstructTheActiveFormattingElements();
2251 appendVoidElementToCurrentMayFoster(
2252 name, attributes,
2253 formPointer);
2254 selfClosing = false;
2255 attributes = null; // CPP
2256 break starttagloop;
2257 case ISINDEX:
2258 errIsindex();
2259 if (formPointer != null && !isTemplateContents()) {
2260 break starttagloop;
2261 }
2262 implicitlyCloseP();
2263 HtmlAttributes formAttrs = new HtmlAttributes(0);
2264 int actionIndex = attributes.getIndex(AttributeName.ACTION);
2265 if (actionIndex > -1) {
2266 formAttrs.addAttribute(
2267 AttributeName.ACTION,
2268 attributes.getValueNoBoundsCheck(actionIndex)
2269 // [NOCPP[
2270 , XmlViolationPolicy.ALLOW
2271 // ]NOCPP]
2272 );
2273 }
2274 appendToCurrentNodeAndPushFormElementMayFoster(formAttrs);
2275 appendVoidElementToCurrentMayFoster(
2276 ElementName.HR,
2277 HtmlAttributes.EMPTY_ATTRIBUTES);
2278 appendToCurrentNodeAndPushElementMayFoster(
2279 ElementName.LABEL,
2280 HtmlAttributes.EMPTY_ATTRIBUTES);
2281 int promptIndex = attributes.getIndex(AttributeName.PROMPT);
2282 if (promptIndex > -1) {
2283 @Auto char[] prompt = Portability.newCharArrayFromString(attributes.getValueNoBoundsCheck(promptIndex));
2284 appendCharacters(stack[currentPtr].node,
2285 prompt, 0, prompt.length);
2286 } else {
2287 appendIsindexPrompt(stack[currentPtr].node);
2288 }
2289 HtmlAttributes inputAttributes = new HtmlAttributes(
2290 0);
2291 inputAttributes.addAttribute(
2292 AttributeName.NAME,
2293 Portability.newStringFromLiteral("isindex")
2294 // [NOCPP[
2295 , XmlViolationPolicy.ALLOW
2296 // ]NOCPP]
2297 );
2298 for (int i = 0; i < attributes.getLength(); i++) {
2299 AttributeName attributeQName = attributes.getAttributeNameNoBoundsCheck(i);
2300 if (AttributeName.NAME == attributeQName
2301 || AttributeName.PROMPT == attributeQName) {
2302 attributes.releaseValue(i);
2303 } else if (AttributeName.ACTION != attributeQName) {
2304 inputAttributes.addAttribute(
2305 attributeQName,
2306 attributes.getValueNoBoundsCheck(i)
2307 // [NOCPP[
2308 , XmlViolationPolicy.ALLOW
2309 // ]NOCPP]
2311 );
2312 }
2313 }
2314 attributes.clearWithoutReleasingContents();
2315 appendVoidElementToCurrentMayFoster(
2316 "input",
2317 inputAttributes, formPointer);
2318 pop(); // label
2319 appendVoidElementToCurrentMayFoster(
2320 ElementName.HR,
2321 HtmlAttributes.EMPTY_ATTRIBUTES);
2322 pop(); // form
2324 if (!isTemplateContents()) {
2325 formPointer = null;
2326 }
2328 selfClosing = false;
2329 // Portability.delete(formAttrs);
2330 // Portability.delete(inputAttributes);
2331 // Don't delete attributes, they are deleted
2332 // later
2333 break starttagloop;
2334 case TEXTAREA:
2335 appendToCurrentNodeAndPushElementMayFoster(
2336 elementName,
2337 attributes, formPointer);
2338 tokenizer.setStateAndEndTagExpectation(
2339 Tokenizer.RCDATA, elementName);
2340 originalMode = mode;
2341 mode = TEXT;
2342 needToDropLF = true;
2343 attributes = null; // CPP
2344 break starttagloop;
2345 case XMP:
2346 implicitlyCloseP();
2347 reconstructTheActiveFormattingElements();
2348 appendToCurrentNodeAndPushElementMayFoster(
2349 elementName,
2350 attributes);
2351 originalMode = mode;
2352 mode = TEXT;
2353 tokenizer.setStateAndEndTagExpectation(
2354 Tokenizer.RAWTEXT, elementName);
2355 attributes = null; // CPP
2356 break starttagloop;
2357 case NOSCRIPT:
2358 if (!scriptingEnabled) {
2359 reconstructTheActiveFormattingElements();
2360 appendToCurrentNodeAndPushElementMayFoster(
2361 elementName,
2362 attributes);
2363 attributes = null; // CPP
2364 break starttagloop;
2365 } else {
2366 // fall through
2367 }
2368 case NOFRAMES:
2369 case IFRAME:
2370 case NOEMBED:
2371 startTagGenericRawText(elementName, attributes);
2372 attributes = null; // CPP
2373 break starttagloop;
2374 case SELECT:
2375 reconstructTheActiveFormattingElements();
2376 appendToCurrentNodeAndPushElementMayFoster(
2377 elementName,
2378 attributes, formPointer);
2379 switch (mode) {
2380 case IN_TABLE:
2381 case IN_CAPTION:
2382 case IN_COLUMN_GROUP:
2383 case IN_TABLE_BODY:
2384 case IN_ROW:
2385 case IN_CELL:
2386 mode = IN_SELECT_IN_TABLE;
2387 break;
2388 default:
2389 mode = IN_SELECT;
2390 break;
2391 }
2392 attributes = null; // CPP
2393 break starttagloop;
2394 case OPTGROUP:
2395 case OPTION:
2396 if (isCurrent("option")) {
2397 pop();
2398 }
2399 reconstructTheActiveFormattingElements();
2400 appendToCurrentNodeAndPushElementMayFoster(
2401 elementName,
2402 attributes);
2403 attributes = null; // CPP
2404 break starttagloop;
2405 case RT_OR_RP:
2406 eltPos = findLastInScope("ruby");
2407 if (eltPos != NOT_FOUND_ON_STACK) {
2408 generateImpliedEndTags();
2409 }
2410 if (eltPos != currentPtr) {
2411 if (eltPos != NOT_FOUND_ON_STACK) {
2412 errStartTagSeenWithoutRuby(name);
2413 } else {
2414 errUnclosedChildrenInRuby();
2415 }
2416 }
2417 appendToCurrentNodeAndPushElementMayFoster(
2418 elementName,
2419 attributes);
2420 attributes = null; // CPP
2421 break starttagloop;
2422 case MATH:
2423 reconstructTheActiveFormattingElements();
2424 attributes.adjustForMath();
2425 if (selfClosing) {
2426 appendVoidElementToCurrentMayFosterMathML(
2427 elementName, attributes);
2428 selfClosing = false;
2429 } else {
2430 appendToCurrentNodeAndPushElementMayFosterMathML(
2431 elementName, attributes);
2432 }
2433 attributes = null; // CPP
2434 break starttagloop;
2435 case SVG:
2436 reconstructTheActiveFormattingElements();
2437 attributes.adjustForSvg();
2438 if (selfClosing) {
2439 appendVoidElementToCurrentMayFosterSVG(
2440 elementName,
2441 attributes);
2442 selfClosing = false;
2443 } else {
2444 appendToCurrentNodeAndPushElementMayFosterSVG(
2445 elementName, attributes);
2446 }
2447 attributes = null; // CPP
2448 break starttagloop;
2449 case CAPTION:
2450 case COL:
2451 case COLGROUP:
2452 case TBODY_OR_THEAD_OR_TFOOT:
2453 case TR:
2454 case TD_OR_TH:
2455 case FRAME:
2456 case FRAMESET:
2457 case HEAD:
2458 errStrayStartTag(name);
2459 break starttagloop;
2460 case OUTPUT_OR_LABEL:
2461 reconstructTheActiveFormattingElements();
2462 appendToCurrentNodeAndPushElementMayFoster(
2463 elementName,
2464 attributes, formPointer);
2465 attributes = null; // CPP
2466 break starttagloop;
2467 default:
2468 reconstructTheActiveFormattingElements();
2469 appendToCurrentNodeAndPushElementMayFoster(
2470 elementName,
2471 attributes);
2472 attributes = null; // CPP
2473 break starttagloop;
2474 }
2475 }
2476 case IN_HEAD:
2477 inheadloop: for (;;) {
2478 switch (group) {
2479 case HTML:
2480 errStrayStartTag(name);
2481 if (!fragment && !isTemplateContents()) {
2482 addAttributesToHtml(attributes);
2483 attributes = null; // CPP
2484 }
2485 break starttagloop;
2486 case BASE:
2487 case LINK_OR_BASEFONT_OR_BGSOUND:
2488 appendVoidElementToCurrentMayFoster(
2489 elementName,
2490 attributes);
2491 selfClosing = false;
2492 attributes = null; // CPP
2493 break starttagloop;
2494 case META:
2495 // Fall through to IN_HEAD_NOSCRIPT
2496 break inheadloop;
2497 case TITLE:
2498 startTagTitleInHead(elementName, attributes);
2499 attributes = null; // CPP
2500 break starttagloop;
2501 case NOSCRIPT:
2502 if (scriptingEnabled) {
2503 appendToCurrentNodeAndPushElement(
2504 elementName,
2505 attributes);
2506 originalMode = mode;
2507 mode = TEXT;
2508 tokenizer.setStateAndEndTagExpectation(
2509 Tokenizer.RAWTEXT, elementName);
2510 } else {
2511 appendToCurrentNodeAndPushElementMayFoster(
2512 elementName,
2513 attributes);
2514 mode = IN_HEAD_NOSCRIPT;
2515 }
2516 attributes = null; // CPP
2517 break starttagloop;
2518 case SCRIPT:
2519 startTagScriptInHead(elementName, attributes);
2520 attributes = null; // CPP
2521 break starttagloop;
2522 case STYLE:
2523 case NOFRAMES:
2524 startTagGenericRawText(elementName, attributes);
2525 attributes = null; // CPP
2526 break starttagloop;
2527 case HEAD:
2528 /* Parse error. */
2529 errFooSeenWhenFooOpen(name);
2530 /* Ignore the token. */
2531 break starttagloop;
2532 case TEMPLATE:
2533 startTagTemplateInHead(elementName, attributes);
2534 attributes = null; // CPP
2535 break starttagloop;
2536 default:
2537 pop();
2538 mode = AFTER_HEAD;
2539 continue starttagloop;
2540 }
2541 }
2542 case IN_HEAD_NOSCRIPT:
2543 switch (group) {
2544 case HTML:
2545 // XXX did Hixie really mean to omit "base"
2546 // here?
2547 errStrayStartTag(name);
2548 if (!fragment && !isTemplateContents()) {
2549 addAttributesToHtml(attributes);
2550 attributes = null; // CPP
2551 }
2552 break starttagloop;
2553 case LINK_OR_BASEFONT_OR_BGSOUND:
2554 appendVoidElementToCurrentMayFoster(
2555 elementName,
2556 attributes);
2557 selfClosing = false;
2558 attributes = null; // CPP
2559 break starttagloop;
2560 case META:
2561 checkMetaCharset(attributes);
2562 appendVoidElementToCurrentMayFoster(
2563 elementName,
2564 attributes);
2565 selfClosing = false;
2566 attributes = null; // CPP
2567 break starttagloop;
2568 case STYLE:
2569 case NOFRAMES:
2570 appendToCurrentNodeAndPushElement(
2571 elementName,
2572 attributes);
2573 originalMode = mode;
2574 mode = TEXT;
2575 tokenizer.setStateAndEndTagExpectation(
2576 Tokenizer.RAWTEXT, elementName);
2577 attributes = null; // CPP
2578 break starttagloop;
2579 case HEAD:
2580 errFooSeenWhenFooOpen(name);
2581 break starttagloop;
2582 case NOSCRIPT:
2583 errFooSeenWhenFooOpen(name);
2584 break starttagloop;
2585 default:
2586 errBadStartTagInHead(name);
2587 pop();
2588 mode = IN_HEAD;
2589 continue;
2590 }
2591 case IN_COLUMN_GROUP:
2592 switch (group) {
2593 case HTML:
2594 errStrayStartTag(name);
2595 if (!fragment && !isTemplateContents()) {
2596 addAttributesToHtml(attributes);
2597 attributes = null; // CPP
2598 }
2599 break starttagloop;
2600 case COL:
2601 appendVoidElementToCurrentMayFoster(
2602 elementName,
2603 attributes);
2604 selfClosing = false;
2605 attributes = null; // CPP
2606 break starttagloop;
2607 case TEMPLATE:
2608 startTagTemplateInHead(elementName, attributes);
2609 attributes = null; // CPP
2610 break starttagloop;
2611 default:
2612 if (currentPtr == 0 || stack[currentPtr].getGroup() == TEMPLATE) {
2613 assert fragment || isTemplateContents();
2614 errGarbageInColgroup();
2615 break starttagloop;
2616 }
2617 pop();
2618 mode = IN_TABLE;
2619 continue;
2620 }
2621 case IN_SELECT_IN_TABLE:
2622 switch (group) {
2623 case CAPTION:
2624 case TBODY_OR_THEAD_OR_TFOOT:
2625 case TR:
2626 case TD_OR_TH:
2627 case TABLE:
2628 errStartTagWithSelectOpen(name);
2629 eltPos = findLastInTableScope("select");
2630 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
2631 assert fragment;
2632 break starttagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375
2633 }
2634 while (currentPtr >= eltPos) {
2635 pop();
2636 }
2637 resetTheInsertionMode();
2638 continue;
2639 default:
2640 // fall through to IN_SELECT
2641 }
2642 case IN_SELECT:
2643 switch (group) {
2644 case HTML:
2645 errStrayStartTag(name);
2646 if (!fragment) {
2647 addAttributesToHtml(attributes);
2648 attributes = null; // CPP
2649 }
2650 break starttagloop;
2651 case OPTION:
2652 if (isCurrent("option")) {
2653 pop();
2654 }
2655 appendToCurrentNodeAndPushElement(
2656 elementName,
2657 attributes);
2658 attributes = null; // CPP
2659 break starttagloop;
2660 case OPTGROUP:
2661 if (isCurrent("option")) {
2662 pop();
2663 }
2664 if (isCurrent("optgroup")) {
2665 pop();
2666 }
2667 appendToCurrentNodeAndPushElement(
2668 elementName,
2669 attributes);
2670 attributes = null; // CPP
2671 break starttagloop;
2672 case SELECT:
2673 errStartSelectWhereEndSelectExpected();
2674 eltPos = findLastInTableScope(name);
2675 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
2676 assert fragment;
2677 errNoSelectInTableScope();
2678 break starttagloop;
2679 } else {
2680 while (currentPtr >= eltPos) {
2681 pop();
2682 }
2683 resetTheInsertionMode();
2684 break starttagloop;
2685 }
2686 case INPUT:
2687 case TEXTAREA:
2688 case KEYGEN:
2689 errStartTagWithSelectOpen(name);
2690 eltPos = findLastInTableScope("select");
2691 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
2692 assert fragment;
2693 break starttagloop;
2694 }
2695 while (currentPtr >= eltPos) {
2696 pop();
2697 }
2698 resetTheInsertionMode();
2699 continue;
2700 case SCRIPT:
2701 startTagScriptInHead(elementName, attributes);
2702 attributes = null; // CPP
2703 break starttagloop;
2704 case TEMPLATE:
2705 startTagTemplateInHead(elementName, attributes);
2706 attributes = null; // CPP
2707 break starttagloop;
2708 default:
2709 errStrayStartTag(name);
2710 break starttagloop;
2711 }
2712 case AFTER_BODY:
2713 switch (group) {
2714 case HTML:
2715 errStrayStartTag(name);
2716 if (!fragment && !isTemplateContents()) {
2717 addAttributesToHtml(attributes);
2718 attributes = null; // CPP
2719 }
2720 break starttagloop;
2721 default:
2722 errStrayStartTag(name);
2723 mode = framesetOk ? FRAMESET_OK : IN_BODY;
2724 continue;
2725 }
2726 case IN_FRAMESET:
2727 switch (group) {
2728 case FRAMESET:
2729 appendToCurrentNodeAndPushElement(
2730 elementName,
2731 attributes);
2732 attributes = null; // CPP
2733 break starttagloop;
2734 case FRAME:
2735 appendVoidElementToCurrentMayFoster(
2736 elementName,
2737 attributes);
2738 selfClosing = false;
2739 attributes = null; // CPP
2740 break starttagloop;
2741 default:
2742 // fall through to AFTER_FRAMESET
2743 }
2744 case AFTER_FRAMESET:
2745 switch (group) {
2746 case HTML:
2747 errStrayStartTag(name);
2748 if (!fragment && !isTemplateContents()) {
2749 addAttributesToHtml(attributes);
2750 attributes = null; // CPP
2751 }
2752 break starttagloop;
2753 case NOFRAMES:
2754 appendToCurrentNodeAndPushElement(
2755 elementName,
2756 attributes);
2757 originalMode = mode;
2758 mode = TEXT;
2759 tokenizer.setStateAndEndTagExpectation(
2760 Tokenizer.RAWTEXT, elementName);
2761 attributes = null; // CPP
2762 break starttagloop;
2763 default:
2764 errStrayStartTag(name);
2765 break starttagloop;
2766 }
2767 case INITIAL:
2768 /*
2769 * Parse error.
2770 */
2771 // [NOCPP[
2772 switch (doctypeExpectation) {
2773 case AUTO:
2774 err("Start tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
2775 break;
2776 case HTML:
2777 // ]NOCPP]
2778 errStartTagWithoutDoctype();
2779 // [NOCPP[
2780 break;
2781 case HTML401_STRICT:
2782 err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
2783 break;
2784 case HTML401_TRANSITIONAL:
2785 err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
2786 break;
2787 case NO_DOCTYPE_ERRORS:
2788 }
2789 // ]NOCPP]
2790 /*
2791 *
2792 * Set the document to quirks mode.
2793 */
2794 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
2795 false);
2796 /*
2797 * Then, switch to the root element mode of the tree
2798 * construction stage
2799 */
2800 mode = BEFORE_HTML;
2801 /*
2802 * and reprocess the current token.
2803 */
2804 continue;
2805 case BEFORE_HTML:
2806 switch (group) {
2807 case HTML:
2808 // optimize error check and streaming SAX by
2809 // hoisting
2810 // "html" handling here.
2811 if (attributes == HtmlAttributes.EMPTY_ATTRIBUTES) {
2812 // This has the right magic side effect
2813 // that
2814 // it
2815 // makes attributes in SAX Tree mutable.
2816 appendHtmlElementToDocumentAndPush();
2817 } else {
2818 appendHtmlElementToDocumentAndPush(attributes);
2819 }
2820 // XXX application cache should fire here
2821 mode = BEFORE_HEAD;
2822 attributes = null; // CPP
2823 break starttagloop;
2824 default:
2825 /*
2826 * Create an HTMLElement node with the tag name
2827 * html, in the HTML namespace. Append it to the
2828 * Document object.
2829 */
2830 appendHtmlElementToDocumentAndPush();
2831 /* Switch to the main mode */
2832 mode = BEFORE_HEAD;
2833 /*
2834 * reprocess the current token.
2835 */
2836 continue;
2837 }
2838 case BEFORE_HEAD:
2839 switch (group) {
2840 case HTML:
2841 errStrayStartTag(name);
2842 if (!fragment && !isTemplateContents()) {
2843 addAttributesToHtml(attributes);
2844 attributes = null; // CPP
2845 }
2846 break starttagloop;
2847 case HEAD:
2848 /*
2849 * A start tag whose tag name is "head"
2850 *
2851 * Create an element for the token.
2852 *
2853 * Set the head element pointer to this new element
2854 * node.
2855 *
2856 * Append the new element to the current node and
2857 * push it onto the stack of open elements.
2858 */
2859 appendToCurrentNodeAndPushHeadElement(attributes);
2860 /*
2861 * Change the insertion mode to "in head".
2862 */
2863 mode = IN_HEAD;
2864 attributes = null; // CPP
2865 break starttagloop;
2866 default:
2867 /*
2868 * Any other start tag token
2869 *
2870 * Act as if a start tag token with the tag name
2871 * "head" and no attributes had been seen,
2872 */
2873 appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
2874 mode = IN_HEAD;
2875 /*
2876 * then reprocess the current token.
2877 *
2878 * This will result in an empty head element being
2879 * generated, with the current token being
2880 * reprocessed in the "after head" insertion mode.
2881 */
2882 continue;
2883 }
2884 case AFTER_HEAD:
2885 switch (group) {
2886 case HTML:
2887 errStrayStartTag(name);
2888 if (!fragment && !isTemplateContents()) {
2889 addAttributesToHtml(attributes);
2890 attributes = null; // CPP
2891 }
2892 break starttagloop;
2893 case BODY:
2894 if (attributes.getLength() == 0) {
2895 // This has the right magic side effect
2896 // that
2897 // it
2898 // makes attributes in SAX Tree mutable.
2899 appendToCurrentNodeAndPushBodyElement();
2900 } else {
2901 appendToCurrentNodeAndPushBodyElement(attributes);
2902 }
2903 framesetOk = false;
2904 mode = IN_BODY;
2905 attributes = null; // CPP
2906 break starttagloop;
2907 case FRAMESET:
2908 appendToCurrentNodeAndPushElement(
2909 elementName,
2910 attributes);
2911 mode = IN_FRAMESET;
2912 attributes = null; // CPP
2913 break starttagloop;
2914 case TEMPLATE:
2915 errFooBetweenHeadAndBody(name);
2916 pushHeadPointerOntoStack();
2917 StackNode<T> headOnStack = stack[currentPtr];
2918 startTagTemplateInHead(elementName, attributes);
2919 removeFromStack(headOnStack);
2920 attributes = null; // CPP
2921 break starttagloop;
2922 case BASE:
2923 case LINK_OR_BASEFONT_OR_BGSOUND:
2924 errFooBetweenHeadAndBody(name);
2925 pushHeadPointerOntoStack();
2926 appendVoidElementToCurrentMayFoster(
2927 elementName,
2928 attributes);
2929 selfClosing = false;
2930 pop(); // head
2931 attributes = null; // CPP
2932 break starttagloop;
2933 case META:
2934 errFooBetweenHeadAndBody(name);
2935 checkMetaCharset(attributes);
2936 pushHeadPointerOntoStack();
2937 appendVoidElementToCurrentMayFoster(
2938 elementName,
2939 attributes);
2940 selfClosing = false;
2941 pop(); // head
2942 attributes = null; // CPP
2943 break starttagloop;
2944 case SCRIPT:
2945 errFooBetweenHeadAndBody(name);
2946 pushHeadPointerOntoStack();
2947 appendToCurrentNodeAndPushElement(
2948 elementName,
2949 attributes);
2950 originalMode = mode;
2951 mode = TEXT;
2952 tokenizer.setStateAndEndTagExpectation(
2953 Tokenizer.SCRIPT_DATA, elementName);
2954 attributes = null; // CPP
2955 break starttagloop;
2956 case STYLE:
2957 case NOFRAMES:
2958 errFooBetweenHeadAndBody(name);
2959 pushHeadPointerOntoStack();
2960 appendToCurrentNodeAndPushElement(
2961 elementName,
2962 attributes);
2963 originalMode = mode;
2964 mode = TEXT;
2965 tokenizer.setStateAndEndTagExpectation(
2966 Tokenizer.RAWTEXT, elementName);
2967 attributes = null; // CPP
2968 break starttagloop;
2969 case TITLE:
2970 errFooBetweenHeadAndBody(name);
2971 pushHeadPointerOntoStack();
2972 appendToCurrentNodeAndPushElement(
2973 elementName,
2974 attributes);
2975 originalMode = mode;
2976 mode = TEXT;
2977 tokenizer.setStateAndEndTagExpectation(
2978 Tokenizer.RCDATA, elementName);
2979 attributes = null; // CPP
2980 break starttagloop;
2981 case HEAD:
2982 errStrayStartTag(name);
2983 break starttagloop;
2984 default:
2985 appendToCurrentNodeAndPushBodyElement();
2986 mode = FRAMESET_OK;
2987 continue;
2988 }
2989 case AFTER_AFTER_BODY:
2990 switch (group) {
2991 case HTML:
2992 errStrayStartTag(name);
2993 if (!fragment && !isTemplateContents()) {
2994 addAttributesToHtml(attributes);
2995 attributes = null; // CPP
2996 }
2997 break starttagloop;
2998 default:
2999 errStrayStartTag(name);
3000 fatal();
3001 mode = framesetOk ? FRAMESET_OK : IN_BODY;
3002 continue;
3003 }
3004 case AFTER_AFTER_FRAMESET:
3005 switch (group) {
3006 case HTML:
3007 errStrayStartTag(name);
3008 if (!fragment && !isTemplateContents()) {
3009 addAttributesToHtml(attributes);
3010 attributes = null; // CPP
3011 }
3012 break starttagloop;
3013 case NOFRAMES:
3014 startTagGenericRawText(elementName, attributes);
3015 attributes = null; // CPP
3016 break starttagloop;
3017 default:
3018 errStrayStartTag(name);
3019 break starttagloop;
3020 }
3021 case TEXT:
3022 assert false;
3023 break starttagloop; // Avoid infinite loop if the assertion
3024 // fails
3025 }
3026 }
3027 if (selfClosing) {
3028 errSelfClosing();
3029 }
3030 // CPPONLY: if (mBuilder == null && attributes != HtmlAttributes.EMPTY_ATTRIBUTES) {
3031 // CPPONLY: Portability.delete(attributes);
3032 // CPPONLY: }
3033 }
3035 private void startTagTitleInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
3036 appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
3037 originalMode = mode;
3038 mode = TEXT;
3039 tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, elementName);
3040 }
3042 private void startTagGenericRawText(ElementName elementName, HtmlAttributes attributes) throws SAXException {
3043 appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
3044 originalMode = mode;
3045 mode = TEXT;
3046 tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, elementName);
3047 }
3049 private void startTagScriptInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
3050 // XXX need to manage much more stuff here if supporting document.write()
3051 appendToCurrentNodeAndPushElementMayFoster(elementName, attributes);
3052 originalMode = mode;
3053 mode = TEXT;
3054 tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA, elementName);
3055 }
3057 private void startTagTemplateInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException {
3058 appendToCurrentNodeAndPushElement(elementName, attributes);
3059 insertMarker();
3060 framesetOk = false;
3061 originalMode = mode;
3062 mode = IN_TEMPLATE;
3063 pushTemplateMode(IN_TEMPLATE);
3064 }
3066 private boolean isTemplateContents() {
3067 return TreeBuilder.NOT_FOUND_ON_STACK != findLast("template");
3068 }
3070 private boolean isTemplateModeStackEmpty() {
3071 return templateModePtr == -1;
3072 }
3074 private boolean isSpecialParentInForeign(StackNode<T> stackNode) {
3075 @NsUri String ns = stackNode.ns;
3076 return ("http://www.w3.org/1999/xhtml" == ns)
3077 || (stackNode.isHtmlIntegrationPoint())
3078 || (("http://www.w3.org/1998/Math/MathML" == ns) && (stackNode.getGroup() == MI_MO_MN_MS_MTEXT));
3079 }
3081 /**
3082 *
3083 * <p>
3084 * C++ memory note: The return value must be released.
3085 *
3086 * @return
3087 * @throws SAXException
3088 * @throws StopSniffingException
3089 */
3090 public static String extractCharsetFromContent(String attributeValue) {
3091 // This is a bit ugly. Converting the string to char array in order to
3092 // make the portability layer smaller.
3093 int charsetState = CHARSET_INITIAL;
3094 int start = -1;
3095 int end = -1;
3096 @Auto char[] buffer = Portability.newCharArrayFromString(attributeValue);
3098 charsetloop: for (int i = 0; i < buffer.length; i++) {
3099 char c = buffer[i];
3100 switch (charsetState) {
3101 case CHARSET_INITIAL:
3102 switch (c) {
3103 case 'c':
3104 case 'C':
3105 charsetState = CHARSET_C;
3106 continue;
3107 default:
3108 continue;
3109 }
3110 case CHARSET_C:
3111 switch (c) {
3112 case 'h':
3113 case 'H':
3114 charsetState = CHARSET_H;
3115 continue;
3116 default:
3117 charsetState = CHARSET_INITIAL;
3118 continue;
3119 }
3120 case CHARSET_H:
3121 switch (c) {
3122 case 'a':
3123 case 'A':
3124 charsetState = CHARSET_A;
3125 continue;
3126 default:
3127 charsetState = CHARSET_INITIAL;
3128 continue;
3129 }
3130 case CHARSET_A:
3131 switch (c) {
3132 case 'r':
3133 case 'R':
3134 charsetState = CHARSET_R;
3135 continue;
3136 default:
3137 charsetState = CHARSET_INITIAL;
3138 continue;
3139 }
3140 case CHARSET_R:
3141 switch (c) {
3142 case 's':
3143 case 'S':
3144 charsetState = CHARSET_S;
3145 continue;
3146 default:
3147 charsetState = CHARSET_INITIAL;
3148 continue;
3149 }
3150 case CHARSET_S:
3151 switch (c) {
3152 case 'e':
3153 case 'E':
3154 charsetState = CHARSET_E;
3155 continue;
3156 default:
3157 charsetState = CHARSET_INITIAL;
3158 continue;
3159 }
3160 case CHARSET_E:
3161 switch (c) {
3162 case 't':
3163 case 'T':
3164 charsetState = CHARSET_T;
3165 continue;
3166 default:
3167 charsetState = CHARSET_INITIAL;
3168 continue;
3169 }
3170 case CHARSET_T:
3171 switch (c) {
3172 case '\t':
3173 case '\n':
3174 case '\u000C':
3175 case '\r':
3176 case ' ':
3177 continue;
3178 case '=':
3179 charsetState = CHARSET_EQUALS;
3180 continue;
3181 default:
3182 return null;
3183 }
3184 case CHARSET_EQUALS:
3185 switch (c) {
3186 case '\t':
3187 case '\n':
3188 case '\u000C':
3189 case '\r':
3190 case ' ':
3191 continue;
3192 case '\'':
3193 start = i + 1;
3194 charsetState = CHARSET_SINGLE_QUOTED;
3195 continue;
3196 case '\"':
3197 start = i + 1;
3198 charsetState = CHARSET_DOUBLE_QUOTED;
3199 continue;
3200 default:
3201 start = i;
3202 charsetState = CHARSET_UNQUOTED;
3203 continue;
3204 }
3205 case CHARSET_SINGLE_QUOTED:
3206 switch (c) {
3207 case '\'':
3208 end = i;
3209 break charsetloop;
3210 default:
3211 continue;
3212 }
3213 case CHARSET_DOUBLE_QUOTED:
3214 switch (c) {
3215 case '\"':
3216 end = i;
3217 break charsetloop;
3218 default:
3219 continue;
3220 }
3221 case CHARSET_UNQUOTED:
3222 switch (c) {
3223 case '\t':
3224 case '\n':
3225 case '\u000C':
3226 case '\r':
3227 case ' ':
3228 case ';':
3229 end = i;
3230 break charsetloop;
3231 default:
3232 continue;
3233 }
3234 }
3235 }
3236 String charset = null;
3237 if (start != -1) {
3238 if (end == -1) {
3239 end = buffer.length;
3240 }
3241 charset = Portability.newStringFromBuffer(buffer, start, end
3242 - start);
3243 }
3244 return charset;
3245 }
3247 private void checkMetaCharset(HtmlAttributes attributes)
3248 throws SAXException {
3249 String charset = attributes.getValue(AttributeName.CHARSET);
3250 if (charset != null) {
3251 if (tokenizer.internalEncodingDeclaration(charset)) {
3252 requestSuspension();
3253 return;
3254 }
3255 return;
3256 }
3257 if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
3258 "content-type",
3259 attributes.getValue(AttributeName.HTTP_EQUIV))) {
3260 return;
3261 }
3262 String content = attributes.getValue(AttributeName.CONTENT);
3263 if (content != null) {
3264 String extract = TreeBuilder.extractCharsetFromContent(content);
3265 // remember not to return early without releasing the string
3266 if (extract != null) {
3267 if (tokenizer.internalEncodingDeclaration(extract)) {
3268 requestSuspension();
3269 }
3270 }
3271 Portability.releaseString(extract);
3272 }
3273 }
3275 public final void endTag(ElementName elementName) throws SAXException {
3276 flushCharacters();
3277 needToDropLF = false;
3278 int eltPos;
3279 int group = elementName.getGroup();
3280 @Local String name = elementName.name;
3281 endtagloop: for (;;) {
3282 if (isInForeign()) {
3283 if (stack[currentPtr].name != name) {
3284 errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName);
3285 }
3286 eltPos = currentPtr;
3287 for (;;) {
3288 if (stack[eltPos].name == name) {
3289 while (currentPtr >= eltPos) {
3290 pop();
3291 }
3292 break endtagloop;
3293 }
3294 if (stack[--eltPos].ns == "http://www.w3.org/1999/xhtml") {
3295 break;
3296 }
3297 }
3298 }
3299 switch (mode) {
3300 case IN_TEMPLATE:
3301 switch (group) {
3302 case TEMPLATE:
3303 // fall through to IN_HEAD
3304 break;
3305 default:
3306 errStrayEndTag(name);
3307 break endtagloop;
3308 }
3309 case IN_ROW:
3310 switch (group) {
3311 case TR:
3312 eltPos = findLastOrRoot(TreeBuilder.TR);
3313 if (eltPos == 0) {
3314 assert fragment || isTemplateContents();
3315 errNoTableRowToClose();
3316 break endtagloop;
3317 }
3318 clearStackBackTo(eltPos);
3319 pop();
3320 mode = IN_TABLE_BODY;
3321 break endtagloop;
3322 case TABLE:
3323 eltPos = findLastOrRoot(TreeBuilder.TR);
3324 if (eltPos == 0) {
3325 assert fragment || isTemplateContents();
3326 errNoTableRowToClose();
3327 break endtagloop;
3328 }
3329 clearStackBackTo(eltPos);
3330 pop();
3331 mode = IN_TABLE_BODY;
3332 continue;
3333 case TBODY_OR_THEAD_OR_TFOOT:
3334 if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) {
3335 errStrayEndTag(name);
3336 break endtagloop;
3337 }
3338 eltPos = findLastOrRoot(TreeBuilder.TR);
3339 if (eltPos == 0) {
3340 assert fragment || isTemplateContents();
3341 errNoTableRowToClose();
3342 break endtagloop;
3343 }
3344 clearStackBackTo(eltPos);
3345 pop();
3346 mode = IN_TABLE_BODY;
3347 continue;
3348 case BODY:
3349 case CAPTION:
3350 case COL:
3351 case COLGROUP:
3352 case HTML:
3353 case TD_OR_TH:
3354 errStrayEndTag(name);
3355 break endtagloop;
3356 default:
3357 // fall through to IN_TABLE
3358 }
3359 case IN_TABLE_BODY:
3360 switch (group) {
3361 case TBODY_OR_THEAD_OR_TFOOT:
3362 eltPos = findLastOrRoot(name);
3363 if (eltPos == 0) {
3364 errStrayEndTag(name);
3365 break endtagloop;
3366 }
3367 clearStackBackTo(eltPos);
3368 pop();
3369 mode = IN_TABLE;
3370 break endtagloop;
3371 case TABLE:
3372 eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot();
3373 if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) {
3374 assert fragment || isTemplateContents();
3375 errStrayEndTag(name);
3376 break endtagloop;
3377 }
3378 clearStackBackTo(eltPos);
3379 pop();
3380 mode = IN_TABLE;
3381 continue;
3382 case BODY:
3383 case CAPTION:
3384 case COL:
3385 case COLGROUP:
3386 case HTML:
3387 case TD_OR_TH:
3388 case TR:
3389 errStrayEndTag(name);
3390 break endtagloop;
3391 default:
3392 // fall through to IN_TABLE
3393 }
3394 case IN_TABLE:
3395 switch (group) {
3396 case TABLE:
3397 eltPos = findLast("table");
3398 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3399 assert fragment || isTemplateContents();
3400 errStrayEndTag(name);
3401 break endtagloop;
3402 }
3403 while (currentPtr >= eltPos) {
3404 pop();
3405 }
3406 resetTheInsertionMode();
3407 break endtagloop;
3408 case BODY:
3409 case CAPTION:
3410 case COL:
3411 case COLGROUP:
3412 case HTML:
3413 case TBODY_OR_THEAD_OR_TFOOT:
3414 case TD_OR_TH:
3415 case TR:
3416 errStrayEndTag(name);
3417 break endtagloop;
3418 case TEMPLATE:
3419 // fall through to IN_HEAD
3420 break;
3421 default:
3422 errStrayEndTag(name);
3423 // fall through to IN_BODY
3424 }
3425 case IN_CAPTION:
3426 switch (group) {
3427 case CAPTION:
3428 eltPos = findLastInTableScope("caption");
3429 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3430 break endtagloop;
3431 }
3432 generateImpliedEndTags();
3433 if (errorHandler != null && currentPtr != eltPos) {
3434 errUnclosedElements(eltPos, name);
3435 }
3436 while (currentPtr >= eltPos) {
3437 pop();
3438 }
3439 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
3440 mode = IN_TABLE;
3441 break endtagloop;
3442 case TABLE:
3443 errTableClosedWhileCaptionOpen();
3444 eltPos = findLastInTableScope("caption");
3445 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3446 break endtagloop;
3447 }
3448 generateImpliedEndTags();
3449 if (errorHandler != null && currentPtr != eltPos) {
3450 errUnclosedElements(eltPos, name);
3451 }
3452 while (currentPtr >= eltPos) {
3453 pop();
3454 }
3455 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
3456 mode = IN_TABLE;
3457 continue;
3458 case BODY:
3459 case COL:
3460 case COLGROUP:
3461 case HTML:
3462 case TBODY_OR_THEAD_OR_TFOOT:
3463 case TD_OR_TH:
3464 case TR:
3465 errStrayEndTag(name);
3466 break endtagloop;
3467 default:
3468 // fall through to IN_BODY
3469 }
3470 case IN_CELL:
3471 switch (group) {
3472 case TD_OR_TH:
3473 eltPos = findLastInTableScope(name);
3474 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3475 errStrayEndTag(name);
3476 break endtagloop;
3477 }
3478 generateImpliedEndTags();
3479 if (errorHandler != null && !isCurrent(name)) {
3480 errUnclosedElements(eltPos, name);
3481 }
3482 while (currentPtr >= eltPos) {
3483 pop();
3484 }
3485 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
3486 mode = IN_ROW;
3487 break endtagloop;
3488 case TABLE:
3489 case TBODY_OR_THEAD_OR_TFOOT:
3490 case TR:
3491 if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) {
3492 assert name == "tbody" || name == "tfoot" || name == "thead" || fragment || isTemplateContents();
3493 errStrayEndTag(name);
3494 break endtagloop;
3495 }
3496 closeTheCell(findLastInTableScopeTdTh());
3497 continue;
3498 case BODY:
3499 case CAPTION:
3500 case COL:
3501 case COLGROUP:
3502 case HTML:
3503 errStrayEndTag(name);
3504 break endtagloop;
3505 default:
3506 // fall through to IN_BODY
3507 }
3508 case FRAMESET_OK:
3509 case IN_BODY:
3510 switch (group) {
3511 case BODY:
3512 if (!isSecondOnStackBody()) {
3513 assert fragment || isTemplateContents();
3514 errStrayEndTag(name);
3515 break endtagloop;
3516 }
3517 assert currentPtr >= 1;
3518 if (errorHandler != null) {
3519 uncloseloop1: for (int i = 2; i <= currentPtr; i++) {
3520 switch (stack[i].getGroup()) {
3521 case DD_OR_DT:
3522 case LI:
3523 case OPTGROUP:
3524 case OPTION: // is this possible?
3525 case P:
3526 case RT_OR_RP:
3527 case TD_OR_TH:
3528 case TBODY_OR_THEAD_OR_TFOOT:
3529 break;
3530 default:
3531 errEndWithUnclosedElements(name);
3532 break uncloseloop1;
3533 }
3534 }
3535 }
3536 mode = AFTER_BODY;
3537 break endtagloop;
3538 case HTML:
3539 if (!isSecondOnStackBody()) {
3540 assert fragment || isTemplateContents();
3541 errStrayEndTag(name);
3542 break endtagloop;
3543 }
3544 if (errorHandler != null) {
3545 uncloseloop2: for (int i = 0; i <= currentPtr; i++) {
3546 switch (stack[i].getGroup()) {
3547 case DD_OR_DT:
3548 case LI:
3549 case P:
3550 case TBODY_OR_THEAD_OR_TFOOT:
3551 case TD_OR_TH:
3552 case BODY:
3553 case HTML:
3554 break;
3555 default:
3556 errEndWithUnclosedElements(name);
3557 break uncloseloop2;
3558 }
3559 }
3560 }
3561 mode = AFTER_BODY;
3562 continue;
3563 case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU:
3564 case UL_OR_OL_OR_DL:
3565 case PRE_OR_LISTING:
3566 case FIELDSET:
3567 case BUTTON:
3568 case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY:
3569 eltPos = findLastInScope(name);
3570 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3571 errStrayEndTag(name);
3572 } else {
3573 generateImpliedEndTags();
3574 if (errorHandler != null && !isCurrent(name)) {
3575 errUnclosedElements(eltPos, name);
3576 }
3577 while (currentPtr >= eltPos) {
3578 pop();
3579 }
3580 }
3581 break endtagloop;
3582 case FORM:
3583 if (!isTemplateContents()) {
3584 if (formPointer == null) {
3585 errStrayEndTag(name);
3586 break endtagloop;
3587 }
3588 formPointer = null;
3589 eltPos = findLastInScope(name);
3590 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3591 errStrayEndTag(name);
3592 break endtagloop;
3593 }
3594 generateImpliedEndTags();
3595 if (errorHandler != null && !isCurrent(name)) {
3596 errUnclosedElements(eltPos, name);
3597 }
3598 removeFromStack(eltPos);
3599 break endtagloop;
3600 } else {
3601 eltPos = findLastInScope(name);
3602 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3603 errStrayEndTag(name);
3604 break endtagloop;
3605 }
3606 generateImpliedEndTags();
3607 if (errorHandler != null && !isCurrent(name)) {
3608 errUnclosedElements(eltPos, name);
3609 }
3610 while (currentPtr >= eltPos) {
3611 pop();
3612 }
3613 break endtagloop;
3614 }
3615 case P:
3616 eltPos = findLastInButtonScope("p");
3617 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3618 errNoElementToCloseButEndTagSeen("p");
3619 // XXX Can the 'in foreign' case happen anymore?
3620 if (isInForeign()) {
3621 errHtmlStartTagInForeignContext(name);
3622 while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
3623 pop();
3624 }
3625 }
3626 appendVoidElementToCurrentMayFoster(
3627 elementName,
3628 HtmlAttributes.EMPTY_ATTRIBUTES);
3629 break endtagloop;
3630 }
3631 generateImpliedEndTagsExceptFor("p");
3632 assert eltPos != TreeBuilder.NOT_FOUND_ON_STACK;
3633 if (errorHandler != null && eltPos != currentPtr) {
3634 errUnclosedElements(eltPos, name);
3635 }
3636 while (currentPtr >= eltPos) {
3637 pop();
3638 }
3639 break endtagloop;
3640 case LI:
3641 eltPos = findLastInListScope(name);
3642 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3643 errNoElementToCloseButEndTagSeen(name);
3644 } else {
3645 generateImpliedEndTagsExceptFor(name);
3646 if (errorHandler != null
3647 && eltPos != currentPtr) {
3648 errUnclosedElements(eltPos, name);
3649 }
3650 while (currentPtr >= eltPos) {
3651 pop();
3652 }
3653 }
3654 break endtagloop;
3655 case DD_OR_DT:
3656 eltPos = findLastInScope(name);
3657 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3658 errNoElementToCloseButEndTagSeen(name);
3659 } else {
3660 generateImpliedEndTagsExceptFor(name);
3661 if (errorHandler != null
3662 && eltPos != currentPtr) {
3663 errUnclosedElements(eltPos, name);
3664 }
3665 while (currentPtr >= eltPos) {
3666 pop();
3667 }
3668 }
3669 break endtagloop;
3670 case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6:
3671 eltPos = findLastInScopeHn();
3672 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3673 errStrayEndTag(name);
3674 } else {
3675 generateImpliedEndTags();
3676 if (errorHandler != null && !isCurrent(name)) {
3677 errUnclosedElements(eltPos, name);
3678 }
3679 while (currentPtr >= eltPos) {
3680 pop();
3681 }
3682 }
3683 break endtagloop;
3684 case OBJECT:
3685 case MARQUEE_OR_APPLET:
3686 eltPos = findLastInScope(name);
3687 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3688 errStrayEndTag(name);
3689 } else {
3690 generateImpliedEndTags();
3691 if (errorHandler != null && !isCurrent(name)) {
3692 errUnclosedElements(eltPos, name);
3693 }
3694 while (currentPtr >= eltPos) {
3695 pop();
3696 }
3697 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
3698 }
3699 break endtagloop;
3700 case BR:
3701 errEndTagBr();
3702 if (isInForeign()) {
3703 errHtmlStartTagInForeignContext(name);
3704 while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
3705 pop();
3706 }
3707 }
3708 reconstructTheActiveFormattingElements();
3709 appendVoidElementToCurrentMayFoster(
3710 elementName,
3711 HtmlAttributes.EMPTY_ATTRIBUTES);
3712 break endtagloop;
3713 case TEMPLATE:
3714 // fall through to IN_HEAD;
3715 break;
3716 case AREA_OR_WBR:
3717 // CPPONLY: case MENUITEM:
3718 case PARAM_OR_SOURCE_OR_TRACK:
3719 case EMBED:
3720 case IMG:
3721 case IMAGE:
3722 case INPUT:
3723 case KEYGEN: // XXX??
3724 case HR:
3725 case ISINDEX:
3726 case IFRAME:
3727 case NOEMBED: // XXX???
3728 case NOFRAMES: // XXX??
3729 case SELECT:
3730 case TABLE:
3731 case TEXTAREA: // XXX??
3732 errStrayEndTag(name);
3733 break endtagloop;
3734 case NOSCRIPT:
3735 if (scriptingEnabled) {
3736 errStrayEndTag(name);
3737 break endtagloop;
3738 } else {
3739 // fall through
3740 }
3741 case A:
3742 case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U:
3743 case FONT:
3744 case NOBR:
3745 if (adoptionAgencyEndTag(name)) {
3746 break endtagloop;
3747 }
3748 // else handle like any other tag
3749 default:
3750 if (isCurrent(name)) {
3751 pop();
3752 break endtagloop;
3753 }
3755 eltPos = currentPtr;
3756 for (;;) {
3757 StackNode<T> node = stack[eltPos];
3758 if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) {
3759 generateImpliedEndTags();
3760 if (errorHandler != null
3761 && !isCurrent(name)) {
3762 errUnclosedElements(eltPos, name);
3763 }
3764 while (currentPtr >= eltPos) {
3765 pop();
3766 }
3767 break endtagloop;
3768 } else if (node.isSpecial()) {
3769 errStrayEndTag(name);
3770 break endtagloop;
3771 }
3772 eltPos--;
3773 }
3774 }
3775 case IN_HEAD:
3776 switch (group) {
3777 case HEAD:
3778 pop();
3779 mode = AFTER_HEAD;
3780 break endtagloop;
3781 case BR:
3782 case HTML:
3783 case BODY:
3784 pop();
3785 mode = AFTER_HEAD;
3786 continue;
3787 case TEMPLATE:
3788 endTagTemplateInHead();
3789 break endtagloop;
3790 default:
3791 errStrayEndTag(name);
3792 break endtagloop;
3793 }
3794 case IN_HEAD_NOSCRIPT:
3795 switch (group) {
3796 case NOSCRIPT:
3797 pop();
3798 mode = IN_HEAD;
3799 break endtagloop;
3800 case BR:
3801 errStrayEndTag(name);
3802 pop();
3803 mode = IN_HEAD;
3804 continue;
3805 default:
3806 errStrayEndTag(name);
3807 break endtagloop;
3808 }
3809 case IN_COLUMN_GROUP:
3810 switch (group) {
3811 case COLGROUP:
3812 if (currentPtr == 0 || stack[currentPtr].getGroup() ==
3813 TreeBuilder.TEMPLATE) {
3814 assert fragment || isTemplateContents();
3815 errGarbageInColgroup();
3816 break endtagloop;
3817 }
3818 pop();
3819 mode = IN_TABLE;
3820 break endtagloop;
3821 case COL:
3822 errStrayEndTag(name);
3823 break endtagloop;
3824 case TEMPLATE:
3825 endTagTemplateInHead();
3826 break endtagloop;
3827 default:
3828 if (currentPtr == 0 || stack[currentPtr].getGroup() ==
3829 TreeBuilder.TEMPLATE) {
3830 assert fragment || isTemplateContents();
3831 errGarbageInColgroup();
3832 break endtagloop;
3833 }
3834 pop();
3835 mode = IN_TABLE;
3836 continue;
3837 }
3838 case IN_SELECT_IN_TABLE:
3839 switch (group) {
3840 case CAPTION:
3841 case TABLE:
3842 case TBODY_OR_THEAD_OR_TFOOT:
3843 case TR:
3844 case TD_OR_TH:
3845 errEndTagSeenWithSelectOpen(name);
3846 if (findLastInTableScope(name) != TreeBuilder.NOT_FOUND_ON_STACK) {
3847 eltPos = findLastInTableScope("select");
3848 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3849 assert fragment;
3850 break endtagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375
3851 }
3852 while (currentPtr >= eltPos) {
3853 pop();
3854 }
3855 resetTheInsertionMode();
3856 continue;
3857 } else {
3858 break endtagloop;
3859 }
3860 default:
3861 // fall through to IN_SELECT
3862 }
3863 case IN_SELECT:
3864 switch (group) {
3865 case OPTION:
3866 if (isCurrent("option")) {
3867 pop();
3868 break endtagloop;
3869 } else {
3870 errStrayEndTag(name);
3871 break endtagloop;
3872 }
3873 case OPTGROUP:
3874 if (isCurrent("option")
3875 && "optgroup" == stack[currentPtr - 1].name) {
3876 pop();
3877 }
3878 if (isCurrent("optgroup")) {
3879 pop();
3880 } else {
3881 errStrayEndTag(name);
3882 }
3883 break endtagloop;
3884 case SELECT:
3885 eltPos = findLastInTableScope("select");
3886 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
3887 assert fragment;
3888 errStrayEndTag(name);
3889 break endtagloop;
3890 }
3891 while (currentPtr >= eltPos) {
3892 pop();
3893 }
3894 resetTheInsertionMode();
3895 break endtagloop;
3896 case TEMPLATE:
3897 endTagTemplateInHead();
3898 break endtagloop;
3899 default:
3900 errStrayEndTag(name);
3901 break endtagloop;
3902 }
3903 case AFTER_BODY:
3904 switch (group) {
3905 case HTML:
3906 if (fragment) {
3907 errStrayEndTag(name);
3908 break endtagloop;
3909 } else {
3910 mode = AFTER_AFTER_BODY;
3911 break endtagloop;
3912 }
3913 default:
3914 errEndTagAfterBody();
3915 mode = framesetOk ? FRAMESET_OK : IN_BODY;
3916 continue;
3917 }
3918 case IN_FRAMESET:
3919 switch (group) {
3920 case FRAMESET:
3921 if (currentPtr == 0) {
3922 assert fragment;
3923 errStrayEndTag(name);
3924 break endtagloop;
3925 }
3926 pop();
3927 if ((!fragment) && !isCurrent("frameset")) {
3928 mode = AFTER_FRAMESET;
3929 }
3930 break endtagloop;
3931 default:
3932 errStrayEndTag(name);
3933 break endtagloop;
3934 }
3935 case AFTER_FRAMESET:
3936 switch (group) {
3937 case HTML:
3938 mode = AFTER_AFTER_FRAMESET;
3939 break endtagloop;
3940 default:
3941 errStrayEndTag(name);
3942 break endtagloop;
3943 }
3944 case INITIAL:
3945 /*
3946 * Parse error.
3947 */
3948 // [NOCPP[
3949 switch (doctypeExpectation) {
3950 case AUTO:
3951 err("End tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D.");
3952 break;
3953 case HTML:
3954 // ]NOCPP]
3955 errEndTagSeenWithoutDoctype();
3956 // [NOCPP[
3957 break;
3958 case HTML401_STRICT:
3959 err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D.");
3960 break;
3961 case HTML401_TRANSITIONAL:
3962 err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D.");
3963 break;
3964 case NO_DOCTYPE_ERRORS:
3965 }
3966 // ]NOCPP]
3967 /*
3968 *
3969 * Set the document to quirks mode.
3970 */
3971 documentModeInternal(DocumentMode.QUIRKS_MODE, null, null,
3972 false);
3973 /*
3974 * Then, switch to the root element mode of the tree
3975 * construction stage
3976 */
3977 mode = BEFORE_HTML;
3978 /*
3979 * and reprocess the current token.
3980 */
3981 continue;
3982 case BEFORE_HTML:
3983 switch (group) {
3984 case HEAD:
3985 case BR:
3986 case HTML:
3987 case BODY:
3988 /*
3989 * Create an HTMLElement node with the tag name
3990 * html, in the HTML namespace. Append it to the
3991 * Document object.
3992 */
3993 appendHtmlElementToDocumentAndPush();
3994 /* Switch to the main mode */
3995 mode = BEFORE_HEAD;
3996 /*
3997 * reprocess the current token.
3998 */
3999 continue;
4000 default:
4001 errStrayEndTag(name);
4002 break endtagloop;
4003 }
4004 case BEFORE_HEAD:
4005 switch (group) {
4006 case HEAD:
4007 case BR:
4008 case HTML:
4009 case BODY:
4010 appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES);
4011 mode = IN_HEAD;
4012 continue;
4013 default:
4014 errStrayEndTag(name);
4015 break endtagloop;
4016 }
4017 case AFTER_HEAD:
4018 switch (group) {
4019 case TEMPLATE:
4020 endTagTemplateInHead();
4021 break endtagloop;
4022 case HTML:
4023 case BODY:
4024 case BR:
4025 appendToCurrentNodeAndPushBodyElement();
4026 mode = FRAMESET_OK;
4027 continue;
4028 default:
4029 errStrayEndTag(name);
4030 break endtagloop;
4031 }
4032 case AFTER_AFTER_BODY:
4033 errStrayEndTag(name);
4034 mode = framesetOk ? FRAMESET_OK : IN_BODY;
4035 continue;
4036 case AFTER_AFTER_FRAMESET:
4037 errStrayEndTag(name);
4038 break endtagloop;
4039 case TEXT:
4040 // XXX need to manage insertion point here
4041 pop();
4042 if (originalMode == AFTER_HEAD) {
4043 silentPop();
4044 }
4045 mode = originalMode;
4046 break endtagloop;
4047 }
4048 } // endtagloop
4049 }
4051 private void endTagTemplateInHead() throws SAXException {
4052 int eltPos = findLast("template");
4053 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
4054 errStrayEndTag("template");
4055 return;
4056 }
4057 generateImpliedEndTags();
4058 if (errorHandler != null && !isCurrent("template")) {
4059 errUnclosedElements(eltPos, "template");
4060 }
4061 while (currentPtr >= eltPos) {
4062 pop();
4063 }
4064 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
4065 popTemplateMode();
4066 resetTheInsertionMode();
4067 }
4069 private int findLastInTableScopeOrRootTemplateTbodyTheadTfoot() {
4070 for (int i = currentPtr; i > 0; i--) {
4071 if (stack[i].getGroup() == TreeBuilder.TBODY_OR_THEAD_OR_TFOOT ||
4072 stack[i].getGroup() == TreeBuilder.TEMPLATE) {
4073 return i;
4074 }
4075 }
4076 return 0;
4077 }
4079 private int findLast(@Local String name) {
4080 for (int i = currentPtr; i > 0; i--) {
4081 if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
4082 return i;
4083 }
4084 }
4085 return TreeBuilder.NOT_FOUND_ON_STACK;
4086 }
4088 private int findLastInTableScope(@Local String name) {
4089 for (int i = currentPtr; i > 0; i--) {
4090 if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
4091 if (stack[i].name == name) {
4092 return i;
4093 } else if (stack[i].name == "table" || stack[i].name == "template") {
4094 return TreeBuilder.NOT_FOUND_ON_STACK;
4095 }
4096 }
4097 }
4098 return TreeBuilder.NOT_FOUND_ON_STACK;
4099 }
4101 private int findLastInButtonScope(@Local String name) {
4102 for (int i = currentPtr; i > 0; i--) {
4103 if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
4104 if (stack[i].name == name) {
4105 return i;
4106 } else if (stack[i].name == "button") {
4107 return TreeBuilder.NOT_FOUND_ON_STACK;
4108 }
4109 }
4111 if (stack[i].isScoping()) {
4112 return TreeBuilder.NOT_FOUND_ON_STACK;
4113 }
4114 }
4115 return TreeBuilder.NOT_FOUND_ON_STACK;
4116 }
4118 private int findLastInScope(@Local String name) {
4119 for (int i = currentPtr; i > 0; i--) {
4120 if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
4121 return i;
4122 } else if (stack[i].isScoping()) {
4123 return TreeBuilder.NOT_FOUND_ON_STACK;
4124 }
4125 }
4126 return TreeBuilder.NOT_FOUND_ON_STACK;
4127 }
4129 private int findLastInListScope(@Local String name) {
4130 for (int i = currentPtr; i > 0; i--) {
4131 if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
4132 if (stack[i].name == name) {
4133 return i;
4134 } else if (stack[i].name == "ul" || stack[i].name == "ol") {
4135 return TreeBuilder.NOT_FOUND_ON_STACK;
4136 }
4137 }
4139 if (stack[i].isScoping()) {
4140 return TreeBuilder.NOT_FOUND_ON_STACK;
4141 }
4142 }
4143 return TreeBuilder.NOT_FOUND_ON_STACK;
4144 }
4146 private int findLastInScopeHn() {
4147 for (int i = currentPtr; i > 0; i--) {
4148 if (stack[i].getGroup() == TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) {
4149 return i;
4150 } else if (stack[i].isScoping()) {
4151 return TreeBuilder.NOT_FOUND_ON_STACK;
4152 }
4153 }
4154 return TreeBuilder.NOT_FOUND_ON_STACK;
4155 }
4157 private void generateImpliedEndTagsExceptFor(@Local String name)
4158 throws SAXException {
4159 for (;;) {
4160 StackNode<T> node = stack[currentPtr];
4161 switch (node.getGroup()) {
4162 case P:
4163 case LI:
4164 case DD_OR_DT:
4165 case OPTION:
4166 case OPTGROUP:
4167 case RT_OR_RP:
4168 if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) {
4169 return;
4170 }
4171 pop();
4172 continue;
4173 default:
4174 return;
4175 }
4176 }
4177 }
4179 private void generateImpliedEndTags() throws SAXException {
4180 for (;;) {
4181 switch (stack[currentPtr].getGroup()) {
4182 case P:
4183 case LI:
4184 case DD_OR_DT:
4185 case OPTION:
4186 case OPTGROUP:
4187 case RT_OR_RP:
4188 pop();
4189 continue;
4190 default:
4191 return;
4192 }
4193 }
4194 }
4196 private boolean isSecondOnStackBody() {
4197 return currentPtr >= 1 && stack[1].getGroup() == TreeBuilder.BODY;
4198 }
4200 private void documentModeInternal(DocumentMode m, String publicIdentifier,
4201 String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
4202 throws SAXException {
4204 if (isSrcdocDocument) {
4205 // Srcdoc documents are always rendered in standards mode.
4206 quirks = false;
4207 if (documentModeHandler != null) {
4208 documentModeHandler.documentMode(
4209 DocumentMode.STANDARDS_MODE
4210 // [NOCPP[
4211 , null, null, false
4212 // ]NOCPP]
4213 );
4214 }
4215 return;
4216 }
4218 quirks = (m == DocumentMode.QUIRKS_MODE);
4219 if (documentModeHandler != null) {
4220 documentModeHandler.documentMode(
4221 m
4222 // [NOCPP[
4223 , publicIdentifier, systemIdentifier,
4224 html4SpecificAdditionalErrorChecks
4225 // ]NOCPP]
4226 );
4227 }
4228 // [NOCPP[
4229 documentMode(m, publicIdentifier, systemIdentifier,
4230 html4SpecificAdditionalErrorChecks);
4231 // ]NOCPP]
4232 }
4234 private boolean isAlmostStandards(String publicIdentifier,
4235 String systemIdentifier) {
4236 if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4237 "-//w3c//dtd xhtml 1.0 transitional//en", publicIdentifier)) {
4238 return true;
4239 }
4240 if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4241 "-//w3c//dtd xhtml 1.0 frameset//en", publicIdentifier)) {
4242 return true;
4243 }
4244 if (systemIdentifier != null) {
4245 if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4246 "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) {
4247 return true;
4248 }
4249 if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4250 "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) {
4251 return true;
4252 }
4253 }
4254 return false;
4255 }
4257 private boolean isQuirky(@Local String name, String publicIdentifier,
4258 String systemIdentifier, boolean forceQuirks) {
4259 if (forceQuirks) {
4260 return true;
4261 }
4262 if (name != HTML_LOCAL) {
4263 return true;
4264 }
4265 if (publicIdentifier != null) {
4266 for (int i = 0; i < TreeBuilder.QUIRKY_PUBLIC_IDS.length; i++) {
4267 if (Portability.lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(
4268 TreeBuilder.QUIRKY_PUBLIC_IDS[i], publicIdentifier)) {
4269 return true;
4270 }
4271 }
4272 if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4273 "-//w3o//dtd w3 html strict 3.0//en//", publicIdentifier)
4274 || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4275 "-/w3c/dtd html 4.0 transitional/en",
4276 publicIdentifier)
4277 || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4278 "html", publicIdentifier)) {
4279 return true;
4280 }
4281 }
4282 if (systemIdentifier == null) {
4283 if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4284 "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) {
4285 return true;
4286 } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4287 "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) {
4288 return true;
4289 }
4290 } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
4291 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd",
4292 systemIdentifier)) {
4293 return true;
4294 }
4295 return false;
4296 }
4298 private void closeTheCell(int eltPos) throws SAXException {
4299 generateImpliedEndTags();
4300 if (errorHandler != null && eltPos != currentPtr) {
4301 errUnclosedElementsCell(eltPos);
4302 }
4303 while (currentPtr >= eltPos) {
4304 pop();
4305 }
4306 clearTheListOfActiveFormattingElementsUpToTheLastMarker();
4307 mode = IN_ROW;
4308 return;
4309 }
4311 private int findLastInTableScopeTdTh() {
4312 for (int i = currentPtr; i > 0; i--) {
4313 @Local String name = stack[i].name;
4314 if (stack[i].ns == "http://www.w3.org/1999/xhtml") {
4315 if ("td" == name || "th" == name) {
4316 return i;
4317 } else if (name == "table" || name == "template") {
4318 return TreeBuilder.NOT_FOUND_ON_STACK;
4319 }
4320 }
4321 }
4322 return TreeBuilder.NOT_FOUND_ON_STACK;
4323 }
4325 private void clearStackBackTo(int eltPos) throws SAXException {
4326 int eltGroup = stack[eltPos].getGroup();
4327 while (currentPtr > eltPos) { // > not >= intentional
4328 if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml"
4329 && stack[currentPtr].getGroup() == TEMPLATE
4330 && (eltGroup == TABLE || eltGroup == TBODY_OR_THEAD_OR_TFOOT|| eltGroup == TR || eltGroup == HTML)) {
4331 return;
4332 }
4333 pop();
4334 }
4335 }
4337 private void resetTheInsertionMode() {
4338 StackNode<T> node;
4339 @Local String name;
4340 @NsUri String ns;
4341 for (int i = currentPtr; i >= 0; i--) {
4342 node = stack[i];
4343 name = node.name;
4344 ns = node.ns;
4345 if (i == 0) {
4346 if (!(contextNamespace == "http://www.w3.org/1999/xhtml" && (contextName == "td" || contextName == "th"))) {
4347 if (fragment) {
4348 // Make sure we are parsing a fragment otherwise the context element doesn't make sense.
4349 name = contextName;
4350 ns = contextNamespace;
4351 }
4352 } else {
4353 mode = framesetOk ? FRAMESET_OK : IN_BODY; // XXX from Hixie's email
4354 return;
4355 }
4356 }
4357 if ("select" == name) {
4358 int ancestorIndex = i;
4359 while (ancestorIndex > 0) {
4360 StackNode<T> ancestor = stack[ancestorIndex--];
4361 if ("http://www.w3.org/1999/xhtml" == ancestor.ns) {
4362 if ("template" == ancestor.name) {
4363 break;
4364 }
4365 if ("table" == ancestor.name) {
4366 mode = IN_SELECT_IN_TABLE;
4367 return;
4368 }
4369 }
4370 }
4371 mode = IN_SELECT;
4372 return;
4373 } else if ("td" == name || "th" == name) {
4374 mode = IN_CELL;
4375 return;
4376 } else if ("tr" == name) {
4377 mode = IN_ROW;
4378 return;
4379 } else if ("tbody" == name || "thead" == name || "tfoot" == name) {
4380 mode = IN_TABLE_BODY;
4381 return;
4382 } else if ("caption" == name) {
4383 mode = IN_CAPTION;
4384 return;
4385 } else if ("colgroup" == name) {
4386 mode = IN_COLUMN_GROUP;
4387 return;
4388 } else if ("table" == name) {
4389 mode = IN_TABLE;
4390 return;
4391 } else if ("http://www.w3.org/1999/xhtml" != ns) {
4392 mode = framesetOk ? FRAMESET_OK : IN_BODY;
4393 return;
4394 } else if ("template" == name) {
4395 assert templateModePtr >= 0;
4396 mode = templateModeStack[templateModePtr];
4397 return;
4398 } else if ("head" == name) {
4399 if (name == contextName) {
4400 mode = framesetOk ? FRAMESET_OK : IN_BODY; // really
4401 } else {
4402 mode = IN_HEAD;
4403 }
4404 return;
4405 } else if ("body" == name) {
4406 mode = framesetOk ? FRAMESET_OK : IN_BODY;
4407 return;
4408 } else if ("frameset" == name) {
4409 // TODO: Fragment case. Add error reporting.
4410 mode = IN_FRAMESET;
4411 return;
4412 } else if ("html" == name) {
4413 if (headPointer == null) {
4414 // TODO: Fragment case. Add error reporting.
4415 mode = BEFORE_HEAD;
4416 } else {
4417 mode = AFTER_HEAD;
4418 }
4419 return;
4420 } else if (i == 0) {
4421 mode = framesetOk ? FRAMESET_OK : IN_BODY;
4422 return;
4423 }
4424 }
4425 }
4427 /**
4428 * @throws SAXException
4429 *
4430 */
4431 private void implicitlyCloseP() throws SAXException {
4432 int eltPos = findLastInButtonScope("p");
4433 if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) {
4434 return;
4435 }
4436 generateImpliedEndTagsExceptFor("p");
4437 if (errorHandler != null && eltPos != currentPtr) {
4438 errUnclosedElementsImplied(eltPos, "p");
4439 }
4440 while (currentPtr >= eltPos) {
4441 pop();
4442 }
4443 }
4445 private boolean debugOnlyClearLastStackSlot() {
4446 stack[currentPtr] = null;
4447 return true;
4448 }
4450 private boolean debugOnlyClearLastListSlot() {
4451 listOfActiveFormattingElements[listPtr] = null;
4452 return true;
4453 }
4455 private void pushTemplateMode(int mode) {
4456 templateModePtr++;
4457 if (templateModePtr == templateModeStack.length) {
4458 int[] newStack = new int[templateModeStack.length + 64];
4459 System.arraycopy(templateModeStack, 0, newStack, 0, templateModeStack.length);
4460 templateModeStack = newStack;
4461 }
4462 templateModeStack[templateModePtr] = mode;
4463 }
4465 @SuppressWarnings("unchecked") private void push(StackNode<T> node) throws SAXException {
4466 currentPtr++;
4467 if (currentPtr == stack.length) {
4468 StackNode<T>[] newStack = new StackNode[stack.length + 64];
4469 System.arraycopy(stack, 0, newStack, 0, stack.length);
4470 stack = newStack;
4471 }
4472 stack[currentPtr] = node;
4473 elementPushed(node.ns, node.popName, node.node);
4474 }
4476 @SuppressWarnings("unchecked") private void silentPush(StackNode<T> node) throws SAXException {
4477 currentPtr++;
4478 if (currentPtr == stack.length) {
4479 StackNode<T>[] newStack = new StackNode[stack.length + 64];
4480 System.arraycopy(stack, 0, newStack, 0, stack.length);
4481 stack = newStack;
4482 }
4483 stack[currentPtr] = node;
4484 }
4486 @SuppressWarnings("unchecked") private void append(StackNode<T> node) {
4487 listPtr++;
4488 if (listPtr == listOfActiveFormattingElements.length) {
4489 StackNode<T>[] newList = new StackNode[listOfActiveFormattingElements.length + 64];
4490 System.arraycopy(listOfActiveFormattingElements, 0, newList, 0,
4491 listOfActiveFormattingElements.length);
4492 listOfActiveFormattingElements = newList;
4493 }
4494 listOfActiveFormattingElements[listPtr] = node;
4495 }
4497 @Inline private void insertMarker() {
4498 append(null);
4499 }
4501 private void clearTheListOfActiveFormattingElementsUpToTheLastMarker() {
4502 while (listPtr > -1) {
4503 if (listOfActiveFormattingElements[listPtr] == null) {
4504 --listPtr;
4505 return;
4506 }
4507 listOfActiveFormattingElements[listPtr].release();
4508 --listPtr;
4509 }
4510 }
4512 @Inline private boolean isCurrent(@Local String name) {
4513 return stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" &&
4514 name == stack[currentPtr].name;
4515 }
4517 private void removeFromStack(int pos) throws SAXException {
4518 if (currentPtr == pos) {
4519 pop();
4520 } else {
4521 fatal();
4522 stack[pos].release();
4523 System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
4524 assert debugOnlyClearLastStackSlot();
4525 currentPtr--;
4526 }
4527 }
4529 private void removeFromStack(StackNode<T> node) throws SAXException {
4530 if (stack[currentPtr] == node) {
4531 pop();
4532 } else {
4533 int pos = currentPtr - 1;
4534 while (pos >= 0 && stack[pos] != node) {
4535 pos--;
4536 }
4537 if (pos == -1) {
4538 // dead code?
4539 return;
4540 }
4541 fatal();
4542 node.release();
4543 System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos);
4544 currentPtr--;
4545 }
4546 }
4548 private void removeFromListOfActiveFormattingElements(int pos) {
4549 assert listOfActiveFormattingElements[pos] != null;
4550 listOfActiveFormattingElements[pos].release();
4551 if (pos == listPtr) {
4552 assert debugOnlyClearLastListSlot();
4553 listPtr--;
4554 return;
4555 }
4556 assert pos < listPtr;
4557 System.arraycopy(listOfActiveFormattingElements, pos + 1,
4558 listOfActiveFormattingElements, pos, listPtr - pos);
4559 assert debugOnlyClearLastListSlot();
4560 listPtr--;
4561 }
4563 /**
4564 * Adoption agency algorithm.
4565 *
4566 * @param name subject as described in the specified algorithm.
4567 * @return Returns true if the algorithm has completed and there is nothing remaining to
4568 * be done. Returns false if the algorithm needs to "act as described in the 'any other
4569 * end tag' entry" as described in the specified algorithm.
4570 * @throws SAXException
4571 */
4572 private boolean adoptionAgencyEndTag(@Local String name) throws SAXException {
4573 // This check intends to ensure that for properly nested tags, closing tags will match
4574 // against the stack instead of the listOfActiveFormattingElements.
4575 if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" &&
4576 stack[currentPtr].name == name &&
4577 findInListOfActiveFormattingElements(stack[currentPtr]) == -1) {
4578 // If the current element matches the name but isn't on the list of active
4579 // formatting elements, then it is possible that the list was mangled by the Noah's Ark
4580 // clause. In this case, we want to match the end tag against the stack instead of
4581 // proceeding with the AAA algorithm that may match against the list of
4582 // active formatting elements (and possibly mangle the tree in unexpected ways).
4583 pop();
4584 return true;
4585 }
4587 // If you crash around here, perhaps some stack node variable claimed to
4588 // be a weak ref isn't.
4589 for (int i = 0; i < 8; ++i) {
4590 int formattingEltListPos = listPtr;
4591 while (formattingEltListPos > -1) {
4592 StackNode<T> listNode = listOfActiveFormattingElements[formattingEltListPos]; // weak ref
4593 if (listNode == null) {
4594 formattingEltListPos = -1;
4595 break;
4596 } else if (listNode.name == name) {
4597 break;
4598 }
4599 formattingEltListPos--;
4600 }
4601 if (formattingEltListPos == -1) {
4602 return false;
4603 }
4604 // this *looks* like a weak ref to the list of formatting elements
4605 StackNode<T> formattingElt = listOfActiveFormattingElements[formattingEltListPos];
4606 int formattingEltStackPos = currentPtr;
4607 boolean inScope = true;
4608 while (formattingEltStackPos > -1) {
4609 StackNode<T> node = stack[formattingEltStackPos]; // weak ref
4610 if (node == formattingElt) {
4611 break;
4612 } else if (node.isScoping()) {
4613 inScope = false;
4614 }
4615 formattingEltStackPos--;
4616 }
4617 if (formattingEltStackPos == -1) {
4618 errNoElementToCloseButEndTagSeen(name);
4619 removeFromListOfActiveFormattingElements(formattingEltListPos);
4620 return true;
4621 }
4622 if (!inScope) {
4623 errNoElementToCloseButEndTagSeen(name);
4624 return true;
4625 }
4626 // stackPos now points to the formatting element and it is in scope
4627 if (formattingEltStackPos != currentPtr) {
4628 errEndTagViolatesNestingRules(name);
4629 }
4630 int furthestBlockPos = formattingEltStackPos + 1;
4631 while (furthestBlockPos <= currentPtr) {
4632 StackNode<T> node = stack[furthestBlockPos]; // weak ref
4633 if (node.isSpecial()) {
4634 break;
4635 }
4636 furthestBlockPos++;
4637 }
4638 if (furthestBlockPos > currentPtr) {
4639 // no furthest block
4640 while (currentPtr >= formattingEltStackPos) {
4641 pop();
4642 }
4643 removeFromListOfActiveFormattingElements(formattingEltListPos);
4644 return true;
4645 }
4646 StackNode<T> commonAncestor = stack[formattingEltStackPos - 1]; // weak ref
4647 StackNode<T> furthestBlock = stack[furthestBlockPos]; // weak ref
4648 // detachFromParent(furthestBlock.node); XXX AAA CHANGE
4649 int bookmark = formattingEltListPos;
4650 int nodePos = furthestBlockPos;
4651 StackNode<T> lastNode = furthestBlock; // weak ref
4652 int j = 0;
4653 for (;;) {
4654 ++j;
4655 nodePos--;
4656 if (nodePos == formattingEltStackPos) {
4657 break;
4658 }
4659 StackNode<T> node = stack[nodePos]; // weak ref
4660 int nodeListPos = findInListOfActiveFormattingElements(node);
4662 if (j > 3 && nodeListPos != -1) {
4663 removeFromListOfActiveFormattingElements(nodeListPos);
4665 // Adjust the indices into the list to account
4666 // for the removal of nodeListPos.
4667 if (nodeListPos <= formattingEltListPos) {
4668 formattingEltListPos--;
4669 }
4670 if (nodeListPos <= bookmark) {
4671 bookmark--;
4672 }
4674 // Update position to reflect removal from list.
4675 nodeListPos = -1;
4676 }
4678 if (nodeListPos == -1) {
4679 assert formattingEltStackPos < nodePos;
4680 assert bookmark < nodePos;
4681 assert furthestBlockPos > nodePos;
4682 removeFromStack(nodePos); // node is now a bad pointer in C++
4683 furthestBlockPos--;
4684 continue;
4685 }
4686 // now node is both on stack and in the list
4687 if (nodePos == furthestBlockPos) {
4688 bookmark = nodeListPos + 1;
4689 }
4690 // if (hasChildren(node.node)) { XXX AAA CHANGE
4691 assert node == listOfActiveFormattingElements[nodeListPos];
4692 assert node == stack[nodePos];
4693 T clone = createElement("http://www.w3.org/1999/xhtml",
4694 node.name, node.attributes.cloneAttributes(null));
4695 StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
4696 node.name, clone, node.popName, node.attributes
4697 // [NOCPP[
4698 , node.getLocator()
4699 // ]NOCPP]
4700 ); // creation ownership goes to stack
4701 node.dropAttributes(); // adopt ownership to newNode
4702 stack[nodePos] = newNode;
4703 newNode.retain(); // retain for list
4704 listOfActiveFormattingElements[nodeListPos] = newNode;
4705 node.release(); // release from stack
4706 node.release(); // release from list
4707 node = newNode;
4708 // } XXX AAA CHANGE
4709 detachFromParent(lastNode.node);
4710 appendElement(lastNode.node, node.node);
4711 lastNode = node;
4712 }
4713 if (commonAncestor.isFosterParenting()) {
4714 fatal();
4715 detachFromParent(lastNode.node);
4716 insertIntoFosterParent(lastNode.node);
4717 } else {
4718 detachFromParent(lastNode.node);
4719 appendElement(lastNode.node, commonAncestor.node);
4720 }
4721 T clone = createElement("http://www.w3.org/1999/xhtml",
4722 formattingElt.name,
4723 formattingElt.attributes.cloneAttributes(null));
4724 StackNode<T> formattingClone = new StackNode<T>(
4725 formattingElt.getFlags(), formattingElt.ns,
4726 formattingElt.name, clone, formattingElt.popName,
4727 formattingElt.attributes
4728 // [NOCPP[
4729 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
4730 // ]NOCPP]
4731 ); // Ownership transfers to stack below
4732 formattingElt.dropAttributes(); // transfer ownership to
4733 // formattingClone
4734 appendChildrenToNewParent(furthestBlock.node, clone);
4735 appendElement(clone, furthestBlock.node);
4736 removeFromListOfActiveFormattingElements(formattingEltListPos);
4737 insertIntoListOfActiveFormattingElements(formattingClone, bookmark);
4738 assert formattingEltStackPos < furthestBlockPos;
4739 removeFromStack(formattingEltStackPos);
4740 // furthestBlockPos is now off by one and points to the slot after
4741 // it
4742 insertIntoStack(formattingClone, furthestBlockPos);
4743 }
4744 return true;
4745 }
4747 private void insertIntoStack(StackNode<T> node, int position)
4748 throws SAXException {
4749 assert currentPtr + 1 < stack.length;
4750 assert position <= currentPtr + 1;
4751 if (position == currentPtr + 1) {
4752 push(node);
4753 } else {
4754 System.arraycopy(stack, position, stack, position + 1,
4755 (currentPtr - position) + 1);
4756 currentPtr++;
4757 stack[position] = node;
4758 }
4759 }
4761 private void insertIntoListOfActiveFormattingElements(
4762 StackNode<T> formattingClone, int bookmark) {
4763 formattingClone.retain();
4764 assert listPtr + 1 < listOfActiveFormattingElements.length;
4765 if (bookmark <= listPtr) {
4766 System.arraycopy(listOfActiveFormattingElements, bookmark,
4767 listOfActiveFormattingElements, bookmark + 1,
4768 (listPtr - bookmark) + 1);
4769 }
4770 listPtr++;
4771 listOfActiveFormattingElements[bookmark] = formattingClone;
4772 }
4774 private int findInListOfActiveFormattingElements(StackNode<T> node) {
4775 for (int i = listPtr; i >= 0; i--) {
4776 if (node == listOfActiveFormattingElements[i]) {
4777 return i;
4778 }
4779 }
4780 return -1;
4781 }
4783 private int findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker(
4784 @Local String name) {
4785 for (int i = listPtr; i >= 0; i--) {
4786 StackNode<T> node = listOfActiveFormattingElements[i];
4787 if (node == null) {
4788 return -1;
4789 } else if (node.name == name) {
4790 return i;
4791 }
4792 }
4793 return -1;
4794 }
4797 private void maybeForgetEarlierDuplicateFormattingElement(
4798 @Local String name, HtmlAttributes attributes) throws SAXException {
4799 int candidate = -1;
4800 int count = 0;
4801 for (int i = listPtr; i >= 0; i--) {
4802 StackNode<T> node = listOfActiveFormattingElements[i];
4803 if (node == null) {
4804 break;
4805 }
4806 if (node.name == name && node.attributes.equalsAnother(attributes)) {
4807 candidate = i;
4808 ++count;
4809 }
4810 }
4811 if (count >= 3) {
4812 removeFromListOfActiveFormattingElements(candidate);
4813 }
4814 }
4816 private int findLastOrRoot(@Local String name) {
4817 for (int i = currentPtr; i > 0; i--) {
4818 if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) {
4819 return i;
4820 }
4821 }
4822 return 0;
4823 }
4825 private int findLastOrRoot(int group) {
4826 for (int i = currentPtr; i > 0; i--) {
4827 if (stack[i].getGroup() == group) {
4828 return i;
4829 }
4830 }
4831 return 0;
4832 }
4834 /**
4835 * Attempt to add attribute to the body element.
4836 * @param attributes the attributes
4837 * @return <code>true</code> iff the attributes were added
4838 * @throws SAXException
4839 */
4840 private boolean addAttributesToBody(HtmlAttributes attributes)
4841 throws SAXException {
4842 // [NOCPP[
4843 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
4844 // ]NOCPP]
4845 if (currentPtr >= 1) {
4846 StackNode<T> body = stack[1];
4847 if (body.getGroup() == TreeBuilder.BODY) {
4848 addAttributesToElement(body.node, attributes);
4849 return true;
4850 }
4851 }
4852 return false;
4853 }
4855 private void addAttributesToHtml(HtmlAttributes attributes)
4856 throws SAXException {
4857 // [NOCPP[
4858 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
4859 // ]NOCPP]
4860 addAttributesToElement(stack[0].node, attributes);
4861 }
4863 private void pushHeadPointerOntoStack() throws SAXException {
4864 assert headPointer != null;
4865 assert mode == AFTER_HEAD;
4866 fatal();
4867 silentPush(new StackNode<T>(ElementName.HEAD, headPointer
4868 // [NOCPP[
4869 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
4870 // ]NOCPP]
4871 ));
4872 }
4874 /**
4875 * @throws SAXException
4876 *
4877 */
4878 private void reconstructTheActiveFormattingElements() throws SAXException {
4879 if (listPtr == -1) {
4880 return;
4881 }
4882 StackNode<T> mostRecent = listOfActiveFormattingElements[listPtr];
4883 if (mostRecent == null || isInStack(mostRecent)) {
4884 return;
4885 }
4886 int entryPos = listPtr;
4887 for (;;) {
4888 entryPos--;
4889 if (entryPos == -1) {
4890 break;
4891 }
4892 if (listOfActiveFormattingElements[entryPos] == null) {
4893 break;
4894 }
4895 if (isInStack(listOfActiveFormattingElements[entryPos])) {
4896 break;
4897 }
4898 }
4899 while (entryPos < listPtr) {
4900 entryPos++;
4901 StackNode<T> entry = listOfActiveFormattingElements[entryPos];
4902 T clone = createElement("http://www.w3.org/1999/xhtml", entry.name,
4903 entry.attributes.cloneAttributes(null));
4904 StackNode<T> entryClone = new StackNode<T>(entry.getFlags(),
4905 entry.ns, entry.name, clone, entry.popName,
4906 entry.attributes
4907 // [NOCPP[
4908 , entry.getLocator()
4909 // ]NOCPP]
4910 );
4911 entry.dropAttributes(); // transfer ownership to entryClone
4912 StackNode<T> currentNode = stack[currentPtr];
4913 if (currentNode.isFosterParenting()) {
4914 insertIntoFosterParent(clone);
4915 } else {
4916 appendElement(clone, currentNode.node);
4917 }
4918 push(entryClone);
4919 // stack takes ownership of the local variable
4920 listOfActiveFormattingElements[entryPos] = entryClone;
4921 // overwriting the old entry on the list, so release & retain
4922 entry.release();
4923 entryClone.retain();
4924 }
4925 }
4927 private void insertIntoFosterParent(T child) throws SAXException {
4928 int tablePos = findLastOrRoot(TreeBuilder.TABLE);
4929 int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
4931 if (templatePos >= tablePos) {
4932 appendElement(child, stack[templatePos].node);
4933 return;
4934 }
4936 StackNode<T> node = stack[tablePos];
4937 insertFosterParentedChild(child, node.node, stack[tablePos - 1].node);
4938 }
4940 private boolean isInStack(StackNode<T> node) {
4941 for (int i = currentPtr; i >= 0; i--) {
4942 if (stack[i] == node) {
4943 return true;
4944 }
4945 }
4946 return false;
4947 }
4949 private void popTemplateMode() {
4950 templateModePtr--;
4951 }
4953 private void pop() throws SAXException {
4954 StackNode<T> node = stack[currentPtr];
4955 assert debugOnlyClearLastStackSlot();
4956 currentPtr--;
4957 elementPopped(node.ns, node.popName, node.node);
4958 node.release();
4959 }
4961 private void silentPop() throws SAXException {
4962 StackNode<T> node = stack[currentPtr];
4963 assert debugOnlyClearLastStackSlot();
4964 currentPtr--;
4965 node.release();
4966 }
4968 private void popOnEof() throws SAXException {
4969 StackNode<T> node = stack[currentPtr];
4970 assert debugOnlyClearLastStackSlot();
4971 currentPtr--;
4972 markMalformedIfScript(node.node);
4973 elementPopped(node.ns, node.popName, node.node);
4974 node.release();
4975 }
4977 // [NOCPP[
4978 private void checkAttributes(HtmlAttributes attributes, @NsUri String ns)
4979 throws SAXException {
4980 if (errorHandler != null) {
4981 int len = attributes.getXmlnsLength();
4982 for (int i = 0; i < len; i++) {
4983 AttributeName name = attributes.getXmlnsAttributeName(i);
4984 if (name == AttributeName.XMLNS) {
4985 if (html4) {
4986 err("Attribute \u201Cxmlns\u201D not allowed here. (HTML4-only error.)");
4987 } else {
4988 String xmlns = attributes.getXmlnsValue(i);
4989 if (!ns.equals(xmlns)) {
4990 err("Bad value \u201C"
4991 + xmlns
4992 + "\u201D for the attribute \u201Cxmlns\u201D (only \u201C"
4993 + ns + "\u201D permitted here).");
4994 switch (namePolicy) {
4995 case ALTER_INFOSET:
4996 // fall through
4997 case ALLOW:
4998 warn("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0.");
4999 break;
5000 case FATAL:
5001 fatal("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0.");
5002 break;
5003 }
5004 }
5005 }
5006 } else if (ns != "http://www.w3.org/1999/xhtml"
5007 && name == AttributeName.XMLNS_XLINK) {
5008 String xmlns = attributes.getXmlnsValue(i);
5009 if (!"http://www.w3.org/1999/xlink".equals(xmlns)) {
5010 err("Bad value \u201C"
5011 + xmlns
5012 + "\u201D for the attribute \u201Cxmlns:link\u201D (only \u201Chttp://www.w3.org/1999/xlink\u201D permitted here).");
5013 switch (namePolicy) {
5014 case ALTER_INFOSET:
5015 // fall through
5016 case ALLOW:
5017 warn("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
5018 break;
5019 case FATAL:
5020 fatal("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics.");
5021 break;
5022 }
5023 }
5024 } else {
5025 err("Attribute \u201C" + attributes.getXmlnsLocalName(i)
5026 + "\u201D not allowed here.");
5027 switch (namePolicy) {
5028 case ALTER_INFOSET:
5029 // fall through
5030 case ALLOW:
5031 warn("Attribute with the local name \u201C"
5032 + attributes.getXmlnsLocalName(i)
5033 + "\u201D is not serializable as XML 1.0.");
5034 break;
5035 case FATAL:
5036 fatal("Attribute with the local name \u201C"
5037 + attributes.getXmlnsLocalName(i)
5038 + "\u201D is not serializable as XML 1.0.");
5039 break;
5040 }
5041 }
5042 }
5043 }
5044 attributes.processNonNcNames(this, namePolicy);
5045 }
5047 private String checkPopName(@Local String name) throws SAXException {
5048 if (NCName.isNCName(name)) {
5049 return name;
5050 } else {
5051 switch (namePolicy) {
5052 case ALLOW:
5053 warn("Element name \u201C" + name
5054 + "\u201D cannot be represented as XML 1.0.");
5055 return name;
5056 case ALTER_INFOSET:
5057 warn("Element name \u201C" + name
5058 + "\u201D cannot be represented as XML 1.0.");
5059 return NCName.escapeName(name);
5060 case FATAL:
5061 fatal("Element name \u201C" + name
5062 + "\u201D cannot be represented as XML 1.0.");
5063 }
5064 }
5065 return null; // keep compiler happy
5066 }
5068 // ]NOCPP]
5070 private void appendHtmlElementToDocumentAndPush(HtmlAttributes attributes)
5071 throws SAXException {
5072 // [NOCPP[
5073 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5074 // ]NOCPP]
5075 T elt = createHtmlElementSetAsRoot(attributes);
5076 StackNode<T> node = new StackNode<T>(ElementName.HTML,
5077 elt
5078 // [NOCPP[
5079 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5080 // ]NOCPP]
5081 );
5082 push(node);
5083 }
5085 private void appendHtmlElementToDocumentAndPush() throws SAXException {
5086 appendHtmlElementToDocumentAndPush(tokenizer.emptyAttributes());
5087 }
5089 private void appendToCurrentNodeAndPushHeadElement(HtmlAttributes attributes)
5090 throws SAXException {
5091 // [NOCPP[
5092 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5093 // ]NOCPP]
5094 T elt = createElement("http://www.w3.org/1999/xhtml", "head",
5095 attributes);
5096 appendElement(elt, stack[currentPtr].node);
5097 headPointer = elt;
5098 StackNode<T> node = new StackNode<T>(ElementName.HEAD,
5099 elt
5100 // [NOCPP[
5101 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5102 // ]NOCPP]
5103 );
5104 push(node);
5105 }
5107 private void appendToCurrentNodeAndPushBodyElement(HtmlAttributes attributes)
5108 throws SAXException {
5109 appendToCurrentNodeAndPushElement(ElementName.BODY,
5110 attributes);
5111 }
5113 private void appendToCurrentNodeAndPushBodyElement() throws SAXException {
5114 appendToCurrentNodeAndPushBodyElement(tokenizer.emptyAttributes());
5115 }
5117 private void appendToCurrentNodeAndPushFormElementMayFoster(
5118 HtmlAttributes attributes) throws SAXException {
5119 // [NOCPP[
5120 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5121 // ]NOCPP]
5122 T elt = createElement("http://www.w3.org/1999/xhtml", "form",
5123 attributes);
5125 if (!isTemplateContents()) {
5126 formPointer = elt;
5127 }
5129 StackNode<T> current = stack[currentPtr];
5130 if (current.isFosterParenting()) {
5131 fatal();
5132 insertIntoFosterParent(elt);
5133 } else {
5134 appendElement(elt, current.node);
5135 }
5136 StackNode<T> node = new StackNode<T>(ElementName.FORM,
5137 elt
5138 // [NOCPP[
5139 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5140 // ]NOCPP]
5141 );
5142 push(node);
5143 }
5145 private void appendToCurrentNodeAndPushFormattingElementMayFoster(
5146 ElementName elementName, HtmlAttributes attributes)
5147 throws SAXException {
5148 // [NOCPP[
5149 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5150 // ]NOCPP]
5151 // This method can't be called for custom elements
5152 HtmlAttributes clone = attributes.cloneAttributes(null);
5153 // Attributes must not be read after calling createElement, because
5154 // createElement may delete attributes in C++.
5155 T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes);
5156 StackNode<T> current = stack[currentPtr];
5157 if (current.isFosterParenting()) {
5158 fatal();
5159 insertIntoFosterParent(elt);
5160 } else {
5161 appendElement(elt, current.node);
5162 }
5163 StackNode<T> node = new StackNode<T>(elementName, elt, clone
5164 // [NOCPP[
5165 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5166 // ]NOCPP]
5167 );
5168 push(node);
5169 append(node);
5170 node.retain(); // append doesn't retain itself
5171 }
5173 private void appendToCurrentNodeAndPushElement(ElementName elementName,
5174 HtmlAttributes attributes)
5175 throws SAXException {
5176 // [NOCPP[
5177 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5178 // ]NOCPP]
5179 // This method can't be called for custom elements
5180 T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes);
5181 appendElement(elt, stack[currentPtr].node);
5182 if (ElementName.TEMPLATE == elementName) {
5183 elt = getDocumentFragmentForTemplate(elt);
5184 }
5185 StackNode<T> node = new StackNode<T>(elementName, elt
5186 // [NOCPP[
5187 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5188 // ]NOCPP]
5189 );
5190 push(node);
5191 }
5193 private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName,
5194 HtmlAttributes attributes)
5195 throws SAXException {
5196 @Local String popName = elementName.name;
5197 // [NOCPP[
5198 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5199 if (elementName.isCustom()) {
5200 popName = checkPopName(popName);
5201 }
5202 // ]NOCPP]
5203 T elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes);
5204 StackNode<T> current = stack[currentPtr];
5205 if (current.isFosterParenting()) {
5206 fatal();
5207 insertIntoFosterParent(elt);
5208 } else {
5209 appendElement(elt, current.node);
5210 }
5211 StackNode<T> node = new StackNode<T>(elementName, elt, popName
5212 // [NOCPP[
5213 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5214 // ]NOCPP]
5215 );
5216 push(node);
5217 }
5219 private void appendToCurrentNodeAndPushElementMayFosterMathML(
5220 ElementName elementName, HtmlAttributes attributes)
5221 throws SAXException {
5222 @Local String popName = elementName.name;
5223 // [NOCPP[
5224 checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML");
5225 if (elementName.isCustom()) {
5226 popName = checkPopName(popName);
5227 }
5228 // ]NOCPP]
5229 boolean markAsHtmlIntegrationPoint = false;
5230 if (ElementName.ANNOTATION_XML == elementName
5231 && annotationXmlEncodingPermitsHtml(attributes)) {
5232 markAsHtmlIntegrationPoint = true;
5233 }
5234 // Attributes must not be read after calling createElement(), since
5235 // createElement may delete the object in C++.
5236 T elt = createElement("http://www.w3.org/1998/Math/MathML", popName,
5237 attributes);
5238 StackNode<T> current = stack[currentPtr];
5239 if (current.isFosterParenting()) {
5240 fatal();
5241 insertIntoFosterParent(elt);
5242 } else {
5243 appendElement(elt, current.node);
5244 }
5245 StackNode<T> node = new StackNode<T>(elementName, elt, popName,
5246 markAsHtmlIntegrationPoint
5247 // [NOCPP[
5248 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5249 // ]NOCPP]
5250 );
5251 push(node);
5252 }
5254 // [NOCPP[
5255 T getDocumentFragmentForTemplate(T template) {
5256 return template;
5257 }
5259 T getFormPointerForContext(T context) {
5260 return null;
5261 }
5262 // ]NOCPP]
5264 private boolean annotationXmlEncodingPermitsHtml(HtmlAttributes attributes) {
5265 String encoding = attributes.getValue(AttributeName.ENCODING);
5266 if (encoding == null) {
5267 return false;
5268 }
5269 return Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
5270 "application/xhtml+xml", encoding)
5271 || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
5272 "text/html", encoding);
5273 }
5275 private void appendToCurrentNodeAndPushElementMayFosterSVG(
5276 ElementName elementName, HtmlAttributes attributes)
5277 throws SAXException {
5278 @Local String popName = elementName.camelCaseName;
5279 // [NOCPP[
5280 checkAttributes(attributes, "http://www.w3.org/2000/svg");
5281 if (elementName.isCustom()) {
5282 popName = checkPopName(popName);
5283 }
5284 // ]NOCPP]
5285 T elt = createElement("http://www.w3.org/2000/svg", popName, attributes);
5286 StackNode<T> current = stack[currentPtr];
5287 if (current.isFosterParenting()) {
5288 fatal();
5289 insertIntoFosterParent(elt);
5290 } else {
5291 appendElement(elt, current.node);
5292 }
5293 StackNode<T> node = new StackNode<T>(elementName, popName, elt
5294 // [NOCPP[
5295 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5296 // ]NOCPP]
5297 );
5298 push(node);
5299 }
5301 private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName,
5302 HtmlAttributes attributes, T form)
5303 throws SAXException {
5304 // [NOCPP[
5305 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5306 // ]NOCPP]
5307 // Can't be called for custom elements
5308 T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes,
5309 form == null || fragment || isTemplateContents() ? null : form);
5310 StackNode<T> current = stack[currentPtr];
5311 if (current.isFosterParenting()) {
5312 fatal();
5313 insertIntoFosterParent(elt);
5314 } else {
5315 appendElement(elt, current.node);
5316 }
5317 StackNode<T> node = new StackNode<T>(elementName, elt
5318 // [NOCPP[
5319 , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer)
5320 // ]NOCPP]
5321 );
5322 push(node);
5323 }
5325 private void appendVoidElementToCurrentMayFoster(
5326 @Local String name, HtmlAttributes attributes, T form) throws SAXException {
5327 // [NOCPP[
5328 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5329 // ]NOCPP]
5330 // Can't be called for custom elements
5331 T elt = createElement("http://www.w3.org/1999/xhtml", name, attributes,
5332 form == null || fragment || isTemplateContents() ? null : form);
5333 StackNode<T> current = stack[currentPtr];
5334 if (current.isFosterParenting()) {
5335 fatal();
5336 insertIntoFosterParent(elt);
5337 } else {
5338 appendElement(elt, current.node);
5339 }
5340 elementPushed("http://www.w3.org/1999/xhtml", name, elt);
5341 elementPopped("http://www.w3.org/1999/xhtml", name, elt);
5342 }
5344 private void appendVoidElementToCurrentMayFoster(
5345 ElementName elementName, HtmlAttributes attributes)
5346 throws SAXException {
5347 @Local String popName = elementName.name;
5348 // [NOCPP[
5349 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5350 if (elementName.isCustom()) {
5351 popName = checkPopName(popName);
5352 }
5353 // ]NOCPP]
5354 T elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes);
5355 StackNode<T> current = stack[currentPtr];
5356 if (current.isFosterParenting()) {
5357 fatal();
5358 insertIntoFosterParent(elt);
5359 } else {
5360 appendElement(elt, current.node);
5361 }
5362 elementPushed("http://www.w3.org/1999/xhtml", popName, elt);
5363 elementPopped("http://www.w3.org/1999/xhtml", popName, elt);
5364 }
5366 private void appendVoidElementToCurrentMayFosterSVG(
5367 ElementName elementName, HtmlAttributes attributes)
5368 throws SAXException {
5369 @Local String popName = elementName.camelCaseName;
5370 // [NOCPP[
5371 checkAttributes(attributes, "http://www.w3.org/2000/svg");
5372 if (elementName.isCustom()) {
5373 popName = checkPopName(popName);
5374 }
5375 // ]NOCPP]
5376 T elt = createElement("http://www.w3.org/2000/svg", popName, attributes);
5377 StackNode<T> current = stack[currentPtr];
5378 if (current.isFosterParenting()) {
5379 fatal();
5380 insertIntoFosterParent(elt);
5381 } else {
5382 appendElement(elt, current.node);
5383 }
5384 elementPushed("http://www.w3.org/2000/svg", popName, elt);
5385 elementPopped("http://www.w3.org/2000/svg", popName, elt);
5386 }
5388 private void appendVoidElementToCurrentMayFosterMathML(
5389 ElementName elementName, HtmlAttributes attributes)
5390 throws SAXException {
5391 @Local String popName = elementName.name;
5392 // [NOCPP[
5393 checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML");
5394 if (elementName.isCustom()) {
5395 popName = checkPopName(popName);
5396 }
5397 // ]NOCPP]
5398 T elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes);
5399 StackNode<T> current = stack[currentPtr];
5400 if (current.isFosterParenting()) {
5401 fatal();
5402 insertIntoFosterParent(elt);
5403 } else {
5404 appendElement(elt, current.node);
5405 }
5406 elementPushed("http://www.w3.org/1998/Math/MathML", popName, elt);
5407 elementPopped("http://www.w3.org/1998/Math/MathML", popName, elt);
5408 }
5410 private void appendVoidElementToCurrent(
5411 @Local String name, HtmlAttributes attributes, T form) throws SAXException {
5412 // [NOCPP[
5413 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5414 // ]NOCPP]
5415 // Can't be called for custom elements
5416 T elt = createElement("http://www.w3.org/1999/xhtml", name, attributes,
5417 form == null || fragment || isTemplateContents() ? null : form);
5418 StackNode<T> current = stack[currentPtr];
5419 appendElement(elt, current.node);
5420 elementPushed("http://www.w3.org/1999/xhtml", name, elt);
5421 elementPopped("http://www.w3.org/1999/xhtml", name, elt);
5422 }
5424 private void appendVoidFormToCurrent(HtmlAttributes attributes) throws SAXException {
5425 // [NOCPP[
5426 checkAttributes(attributes, "http://www.w3.org/1999/xhtml");
5427 // ]NOCPP]
5428 T elt = createElement("http://www.w3.org/1999/xhtml", "form",
5429 attributes);
5430 formPointer = elt;
5431 // ownership transferred to form pointer
5432 StackNode<T> current = stack[currentPtr];
5433 appendElement(elt, current.node);
5434 elementPushed("http://www.w3.org/1999/xhtml", "form", elt);
5435 elementPopped("http://www.w3.org/1999/xhtml", "form", elt);
5436 }
5438 // [NOCPP[
5440 private final void accumulateCharactersForced(@Const @NoLength char[] buf,
5441 int start, int length) throws SAXException {
5442 int newLen = charBufferLen + length;
5443 if (newLen > charBuffer.length) {
5444 char[] newBuf = new char[newLen];
5445 System.arraycopy(charBuffer, 0, newBuf, 0, charBufferLen);
5446 charBuffer = newBuf;
5447 }
5448 System.arraycopy(buf, start, charBuffer, charBufferLen, length);
5449 charBufferLen = newLen;
5450 }
5452 // ]NOCPP]
5454 protected void accumulateCharacters(@Const @NoLength char[] buf, int start,
5455 int length) throws SAXException {
5456 appendCharacters(stack[currentPtr].node, buf, start, length);
5457 }
5459 // ------------------------------- //
5461 protected final void requestSuspension() {
5462 tokenizer.requestSuspension();
5463 }
5465 protected abstract T createElement(@NsUri String ns, @Local String name,
5466 HtmlAttributes attributes) throws SAXException;
5468 protected T createElement(@NsUri String ns, @Local String name,
5469 HtmlAttributes attributes, T form) throws SAXException {
5470 return createElement("http://www.w3.org/1999/xhtml", name, attributes);
5471 }
5473 protected abstract T createHtmlElementSetAsRoot(HtmlAttributes attributes)
5474 throws SAXException;
5476 protected abstract void detachFromParent(T element) throws SAXException;
5478 protected abstract boolean hasChildren(T element) throws SAXException;
5480 protected abstract void appendElement(T child, T newParent)
5481 throws SAXException;
5483 protected abstract void appendChildrenToNewParent(T oldParent, T newParent)
5484 throws SAXException;
5486 protected abstract void insertFosterParentedChild(T child, T table,
5487 T stackParent) throws SAXException;
5489 protected abstract void insertFosterParentedCharacters(
5490 @NoLength char[] buf, int start, int length, T table, T stackParent)
5491 throws SAXException;
5493 protected abstract void appendCharacters(T parent, @NoLength char[] buf,
5494 int start, int length) throws SAXException;
5496 protected abstract void appendIsindexPrompt(T parent) throws SAXException;
5498 protected abstract void appendComment(T parent, @NoLength char[] buf,
5499 int start, int length) throws SAXException;
5501 protected abstract void appendCommentToDocument(@NoLength char[] buf,
5502 int start, int length) throws SAXException;
5504 protected abstract void addAttributesToElement(T element,
5505 HtmlAttributes attributes) throws SAXException;
5507 protected void markMalformedIfScript(T elt) throws SAXException {
5509 }
5511 protected void start(boolean fragmentMode) throws SAXException {
5513 }
5515 protected void end() throws SAXException {
5517 }
5519 protected void appendDoctypeToDocument(@Local String name,
5520 String publicIdentifier, String systemIdentifier)
5521 throws SAXException {
5523 }
5525 protected void elementPushed(@NsUri String ns, @Local String name, T node)
5526 throws SAXException {
5528 }
5530 protected void elementPopped(@NsUri String ns, @Local String name, T node)
5531 throws SAXException {
5533 }
5535 // [NOCPP[
5537 protected void documentMode(DocumentMode m, String publicIdentifier,
5538 String systemIdentifier, boolean html4SpecificAdditionalErrorChecks)
5539 throws SAXException {
5541 }
5543 /**
5544 * @see nu.validator.htmlparser.common.TokenHandler#wantsComments()
5545 */
5546 public boolean wantsComments() {
5547 return wantingComments;
5548 }
5550 public void setIgnoringComments(boolean ignoreComments) {
5551 wantingComments = !ignoreComments;
5552 }
5554 /**
5555 * Sets the errorHandler.
5556 *
5557 * @param errorHandler
5558 * the errorHandler to set
5559 */
5560 public final void setErrorHandler(ErrorHandler errorHandler) {
5561 this.errorHandler = errorHandler;
5562 }
5564 /**
5565 * Returns the errorHandler.
5566 *
5567 * @return the errorHandler
5568 */
5569 public ErrorHandler getErrorHandler() {
5570 return errorHandler;
5571 }
5573 /**
5574 * The argument MUST be an interned string or <code>null</code>.
5575 *
5576 * @param context
5577 */
5578 public final void setFragmentContext(@Local String context) {
5579 this.contextName = context;
5580 this.contextNamespace = "http://www.w3.org/1999/xhtml";
5581 this.contextNode = null;
5582 this.fragment = (contextName != null);
5583 this.quirks = false;
5584 }
5586 // ]NOCPP]
5588 /**
5589 * @see nu.validator.htmlparser.common.TokenHandler#cdataSectionAllowed()
5590 */
5591 @Inline public boolean cdataSectionAllowed() throws SAXException {
5592 return isInForeign();
5593 }
5595 private boolean isInForeign() {
5596 return currentPtr >= 0
5597 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml";
5598 }
5600 private boolean isInForeignButNotHtmlOrMathTextIntegrationPoint() {
5601 if (currentPtr < 0) {
5602 return false;
5603 }
5604 return !isSpecialParentInForeign(stack[currentPtr]);
5605 }
5607 /**
5608 * The argument MUST be an interned string or <code>null</code>.
5609 *
5610 * @param context
5611 */
5612 public final void setFragmentContext(@Local String context,
5613 @NsUri String ns, T node, boolean quirks) {
5614 this.contextName = context;
5615 this.contextNamespace = ns;
5616 this.contextNode = node;
5617 this.fragment = (contextName != null);
5618 this.quirks = quirks;
5619 }
5621 protected final T currentNode() {
5622 return stack[currentPtr].node;
5623 }
5625 /**
5626 * Returns the scriptingEnabled.
5627 *
5628 * @return the scriptingEnabled
5629 */
5630 public boolean isScriptingEnabled() {
5631 return scriptingEnabled;
5632 }
5634 /**
5635 * Sets the scriptingEnabled.
5636 *
5637 * @param scriptingEnabled
5638 * the scriptingEnabled to set
5639 */
5640 public void setScriptingEnabled(boolean scriptingEnabled) {
5641 this.scriptingEnabled = scriptingEnabled;
5642 }
5644 public void setIsSrcdocDocument(boolean isSrcdocDocument) {
5645 this.isSrcdocDocument = isSrcdocDocument;
5646 }
5648 // [NOCPP[
5650 /**
5651 * Sets the doctypeExpectation.
5652 *
5653 * @param doctypeExpectation
5654 * the doctypeExpectation to set
5655 */
5656 public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
5657 this.doctypeExpectation = doctypeExpectation;
5658 }
5660 public void setNamePolicy(XmlViolationPolicy namePolicy) {
5661 this.namePolicy = namePolicy;
5662 }
5664 /**
5665 * Sets the documentModeHandler.
5666 *
5667 * @param documentModeHandler
5668 * the documentModeHandler to set
5669 */
5670 public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
5671 this.documentModeHandler = documentModeHandler;
5672 }
5674 /**
5675 * Sets the reportingDoctype.
5676 *
5677 * @param reportingDoctype
5678 * the reportingDoctype to set
5679 */
5680 public void setReportingDoctype(boolean reportingDoctype) {
5681 this.reportingDoctype = reportingDoctype;
5682 }
5684 // ]NOCPP]
5686 /**
5687 * Flushes the pending characters. Public for document.write use cases only.
5688 * @throws SAXException
5689 */
5690 public final void flushCharacters() throws SAXException {
5691 if (charBufferLen > 0) {
5692 if ((mode == IN_TABLE || mode == IN_TABLE_BODY || mode == IN_ROW)
5693 && charBufferContainsNonWhitespace()) {
5694 errNonSpaceInTable();
5695 reconstructTheActiveFormattingElements();
5696 if (!stack[currentPtr].isFosterParenting()) {
5697 // reconstructing gave us a new current node
5698 appendCharacters(currentNode(), charBuffer, 0,
5699 charBufferLen);
5700 charBufferLen = 0;
5701 return;
5702 }
5704 int tablePos = findLastOrRoot(TreeBuilder.TABLE);
5705 int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE);
5707 if (templatePos >= tablePos) {
5708 appendCharacters(stack[templatePos].node, charBuffer, 0, charBufferLen);
5709 charBufferLen = 0;
5710 return;
5711 }
5713 StackNode<T> tableElt = stack[tablePos];
5714 insertFosterParentedCharacters(charBuffer, 0, charBufferLen,
5715 tableElt.node, stack[tablePos - 1].node);
5716 charBufferLen = 0;
5717 return;
5718 }
5719 appendCharacters(currentNode(), charBuffer, 0, charBufferLen);
5720 charBufferLen = 0;
5721 }
5722 }
5724 private boolean charBufferContainsNonWhitespace() {
5725 for (int i = 0; i < charBufferLen; i++) {
5726 switch (charBuffer[i]) {
5727 case ' ':
5728 case '\t':
5729 case '\n':
5730 case '\r':
5731 case '\u000C':
5732 continue;
5733 default:
5734 return true;
5735 }
5736 }
5737 return false;
5738 }
5740 /**
5741 * Creates a comparable snapshot of the tree builder state. Snapshot
5742 * creation is only supported immediately after a script end tag has been
5743 * processed. In C++ the caller is responsible for calling
5744 * <code>delete</code> on the returned object.
5745 *
5746 * @return a snapshot.
5747 * @throws SAXException
5748 */
5749 @SuppressWarnings("unchecked") public TreeBuilderState<T> newSnapshot()
5750 throws SAXException {
5751 StackNode<T>[] listCopy = new StackNode[listPtr + 1];
5752 for (int i = 0; i < listCopy.length; i++) {
5753 StackNode<T> node = listOfActiveFormattingElements[i];
5754 if (node != null) {
5755 StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
5756 node.name, node.node, node.popName,
5757 node.attributes.cloneAttributes(null)
5758 // [NOCPP[
5759 , node.getLocator()
5760 // ]NOCPP]
5761 );
5762 listCopy[i] = newNode;
5763 } else {
5764 listCopy[i] = null;
5765 }
5766 }
5767 StackNode<T>[] stackCopy = new StackNode[currentPtr + 1];
5768 for (int i = 0; i < stackCopy.length; i++) {
5769 StackNode<T> node = stack[i];
5770 int listIndex = findInListOfActiveFormattingElements(node);
5771 if (listIndex == -1) {
5772 StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
5773 node.name, node.node, node.popName,
5774 null
5775 // [NOCPP[
5776 , node.getLocator()
5777 // ]NOCPP]
5778 );
5779 stackCopy[i] = newNode;
5780 } else {
5781 stackCopy[i] = listCopy[listIndex];
5782 stackCopy[i].retain();
5783 }
5784 }
5785 int[] templateModeStackCopy = new int[templateModePtr + 1];
5786 System.arraycopy(templateModeStack, 0, templateModeStackCopy, 0,
5787 templateModeStackCopy.length);
5788 return new StateSnapshot<T>(stackCopy, listCopy, templateModeStackCopy, formPointer,
5789 headPointer, deepTreeSurrogateParent, mode, originalMode, framesetOk,
5790 needToDropLF, quirks);
5791 }
5793 public boolean snapshotMatches(TreeBuilderState<T> snapshot) {
5794 StackNode<T>[] stackCopy = snapshot.getStack();
5795 int stackLen = snapshot.getStackLength();
5796 StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements();
5797 int listLen = snapshot.getListOfActiveFormattingElementsLength();
5798 int[] templateModeStackCopy = snapshot.getTemplateModeStack();
5799 int templateModeStackLen = snapshot.getTemplateModeStackLength();
5801 if (stackLen != currentPtr + 1
5802 || listLen != listPtr + 1
5803 || templateModeStackLen != templateModePtr + 1
5804 || formPointer != snapshot.getFormPointer()
5805 || headPointer != snapshot.getHeadPointer()
5806 || deepTreeSurrogateParent != snapshot.getDeepTreeSurrogateParent()
5807 || mode != snapshot.getMode()
5808 || originalMode != snapshot.getOriginalMode()
5809 || framesetOk != snapshot.isFramesetOk()
5810 || needToDropLF != snapshot.isNeedToDropLF()
5811 || quirks != snapshot.isQuirks()) { // maybe just assert quirks
5812 return false;
5813 }
5814 for (int i = listLen - 1; i >= 0; i--) {
5815 if (listCopy[i] == null
5816 && listOfActiveFormattingElements[i] == null) {
5817 continue;
5818 } else if (listCopy[i] == null
5819 || listOfActiveFormattingElements[i] == null) {
5820 return false;
5821 }
5822 if (listCopy[i].node != listOfActiveFormattingElements[i].node) {
5823 return false; // it's possible that this condition is overly
5824 // strict
5825 }
5826 }
5827 for (int i = stackLen - 1; i >= 0; i--) {
5828 if (stackCopy[i].node != stack[i].node) {
5829 return false;
5830 }
5831 }
5832 for (int i = templateModeStackLen - 1; i >=0; i--) {
5833 if (templateModeStackCopy[i] != templateModeStack[i]) {
5834 return false;
5835 }
5836 }
5837 return true;
5838 }
5840 @SuppressWarnings("unchecked") public void loadState(
5841 TreeBuilderState<T> snapshot, Interner interner)
5842 throws SAXException {
5843 StackNode<T>[] stackCopy = snapshot.getStack();
5844 int stackLen = snapshot.getStackLength();
5845 StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements();
5846 int listLen = snapshot.getListOfActiveFormattingElementsLength();
5847 int[] templateModeStackCopy = snapshot.getTemplateModeStack();
5848 int templateModeStackLen = snapshot.getTemplateModeStackLength();
5850 for (int i = 0; i <= listPtr; i++) {
5851 if (listOfActiveFormattingElements[i] != null) {
5852 listOfActiveFormattingElements[i].release();
5853 }
5854 }
5855 if (listOfActiveFormattingElements.length < listLen) {
5856 listOfActiveFormattingElements = new StackNode[listLen];
5857 }
5858 listPtr = listLen - 1;
5860 for (int i = 0; i <= currentPtr; i++) {
5861 stack[i].release();
5862 }
5863 if (stack.length < stackLen) {
5864 stack = new StackNode[stackLen];
5865 }
5866 currentPtr = stackLen - 1;
5868 if (templateModeStack.length < templateModeStackLen) {
5869 templateModeStack = new int[templateModeStackLen];
5870 }
5871 templateModePtr = templateModeStackLen - 1;
5873 for (int i = 0; i < listLen; i++) {
5874 StackNode<T> node = listCopy[i];
5875 if (node != null) {
5876 StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
5877 Portability.newLocalFromLocal(node.name, interner), node.node,
5878 Portability.newLocalFromLocal(node.popName, interner),
5879 node.attributes.cloneAttributes(null)
5880 // [NOCPP[
5881 , node.getLocator()
5882 // ]NOCPP]
5883 );
5884 listOfActiveFormattingElements[i] = newNode;
5885 } else {
5886 listOfActiveFormattingElements[i] = null;
5887 }
5888 }
5889 for (int i = 0; i < stackLen; i++) {
5890 StackNode<T> node = stackCopy[i];
5891 int listIndex = findInArray(node, listCopy);
5892 if (listIndex == -1) {
5893 StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns,
5894 Portability.newLocalFromLocal(node.name, interner), node.node,
5895 Portability.newLocalFromLocal(node.popName, interner),
5896 null
5897 // [NOCPP[
5898 , node.getLocator()
5899 // ]NOCPP]
5900 );
5901 stack[i] = newNode;
5902 } else {
5903 stack[i] = listOfActiveFormattingElements[listIndex];
5904 stack[i].retain();
5905 }
5906 }
5907 System.arraycopy(templateModeStackCopy, 0, templateModeStack, 0, templateModeStackLen);
5908 formPointer = snapshot.getFormPointer();
5909 headPointer = snapshot.getHeadPointer();
5910 deepTreeSurrogateParent = snapshot.getDeepTreeSurrogateParent();
5911 mode = snapshot.getMode();
5912 originalMode = snapshot.getOriginalMode();
5913 framesetOk = snapshot.isFramesetOk();
5914 needToDropLF = snapshot.isNeedToDropLF();
5915 quirks = snapshot.isQuirks();
5916 }
5918 private int findInArray(StackNode<T> node, StackNode<T>[] arr) {
5919 for (int i = listPtr; i >= 0; i--) {
5920 if (node == arr[i]) {
5921 return i;
5922 }
5923 }
5924 return -1;
5925 }
5927 /**
5928 * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer()
5929 */
5930 public T getFormPointer() {
5931 return formPointer;
5932 }
5934 /**
5935 * Returns the headPointer.
5936 *
5937 * @return the headPointer
5938 */
5939 public T getHeadPointer() {
5940 return headPointer;
5941 }
5943 /**
5944 * Returns the deepTreeSurrogateParent.
5945 *
5946 * @return the deepTreeSurrogateParent
5947 */
5948 public T getDeepTreeSurrogateParent() {
5949 return deepTreeSurrogateParent;
5950 }
5952 /**
5953 * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements()
5954 */
5955 public StackNode<T>[] getListOfActiveFormattingElements() {
5956 return listOfActiveFormattingElements;
5957 }
5959 /**
5960 * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack()
5961 */
5962 public StackNode<T>[] getStack() {
5963 return stack;
5964 }
5966 /**
5967 * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack()
5968 */
5969 public int[] getTemplateModeStack() {
5970 return templateModeStack;
5971 }
5973 /**
5974 * Returns the mode.
5975 *
5976 * @return the mode
5977 */
5978 public int getMode() {
5979 return mode;
5980 }
5982 /**
5983 * Returns the originalMode.
5984 *
5985 * @return the originalMode
5986 */
5987 public int getOriginalMode() {
5988 return originalMode;
5989 }
5991 /**
5992 * Returns the framesetOk.
5993 *
5994 * @return the framesetOk
5995 */
5996 public boolean isFramesetOk() {
5997 return framesetOk;
5998 }
6000 /**
6001 * Returns the needToDropLF.
6002 *
6003 * @return the needToDropLF
6004 */
6005 public boolean isNeedToDropLF() {
6006 return needToDropLF;
6007 }
6009 /**
6010 * Returns the quirks.
6011 *
6012 * @return the quirks
6013 */
6014 public boolean isQuirks() {
6015 return quirks;
6016 }
6018 /**
6019 * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength()
6020 */
6021 public int getListOfActiveFormattingElementsLength() {
6022 return listPtr + 1;
6023 }
6025 /**
6026 * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength()
6027 */
6028 public int getStackLength() {
6029 return currentPtr + 1;
6030 }
6032 /**
6033 * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength()
6034 */
6035 public int getTemplateModeStackLength() {
6036 return templateModePtr + 1;
6037 }
6039 /**
6040 * Reports a stray start tag.
6041 * @param name the name of the stray tag
6042 *
6043 * @throws SAXException
6044 */
6045 private void errStrayStartTag(@Local String name) throws SAXException {
6046 err("Stray start tag \u201C" + name + "\u201D.");
6047 }
6049 /**
6050 * Reports a stray end tag.
6051 * @param name the name of the stray tag
6052 *
6053 * @throws SAXException
6054 */
6055 private void errStrayEndTag(@Local String name) throws SAXException {
6056 err("Stray end tag \u201C" + name + "\u201D.");
6057 }
6059 /**
6060 * Reports a state when elements expected to be closed were not.
6061 *
6062 * @param eltPos the position of the start tag on the stack of the element
6063 * being closed.
6064 * @param name the name of the end tag
6065 *
6066 * @throws SAXException
6067 */
6068 private void errUnclosedElements(int eltPos, @Local String name) throws SAXException {
6069 errNoCheck("End tag \u201C" + name + "\u201D seen, but there were open elements.");
6070 errListUnclosedStartTags(eltPos);
6071 }
6073 /**
6074 * Reports a state when elements expected to be closed ahead of an implied
6075 * end tag but were not.
6076 *
6077 * @param eltPos the position of the start tag on the stack of the element
6078 * being closed.
6079 * @param name the name of the end tag
6080 *
6081 * @throws SAXException
6082 */
6083 private void errUnclosedElementsImplied(int eltPos, String name) throws SAXException {
6084 errNoCheck("End tag \u201C" + name + "\u201D implied, but there were open elements.");
6085 errListUnclosedStartTags(eltPos);
6086 }
6088 /**
6089 * Reports a state when elements expected to be closed ahead of an implied
6090 * table cell close.
6091 *
6092 * @param eltPos the position of the start tag on the stack of the element
6093 * being closed.
6094 * @throws SAXException
6095 */
6096 private void errUnclosedElementsCell(int eltPos) throws SAXException {
6097 errNoCheck("A table cell was implicitly closed, but there were open elements.");
6098 errListUnclosedStartTags(eltPos);
6099 }
6101 private void errStrayDoctype() throws SAXException {
6102 err("Stray doctype.");
6103 }
6105 private void errAlmostStandardsDoctype() throws SAXException {
6106 if (!isSrcdocDocument) {
6107 err("Almost standards mode doctype. Expected \u201C<!DOCTYPE html>\u201D.");
6108 }
6109 }
6111 private void errQuirkyDoctype() throws SAXException {
6112 if (!isSrcdocDocument) {
6113 err("Quirky doctype. Expected \u201C<!DOCTYPE html>\u201D.");
6114 }
6115 }
6117 private void errNonSpaceInTrailer() throws SAXException {
6118 err("Non-space character in page trailer.");
6119 }
6121 private void errNonSpaceAfterFrameset() throws SAXException {
6122 err("Non-space after \u201Cframeset\u201D.");
6123 }
6125 private void errNonSpaceInFrameset() throws SAXException {
6126 err("Non-space in \u201Cframeset\u201D.");
6127 }
6129 private void errNonSpaceAfterBody() throws SAXException {
6130 err("Non-space character after body.");
6131 }
6133 private void errNonSpaceInColgroupInFragment() throws SAXException {
6134 err("Non-space in \u201Ccolgroup\u201D when parsing fragment.");
6135 }
6137 private void errNonSpaceInNoscriptInHead() throws SAXException {
6138 err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D.");
6139 }
6141 private void errFooBetweenHeadAndBody(@Local String name) throws SAXException {
6142 if (errorHandler == null) {
6143 return;
6144 }
6145 errNoCheck("\u201C" + name + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
6146 }
6148 private void errStartTagWithoutDoctype() throws SAXException {
6149 if (!isSrcdocDocument) {
6150 err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
6151 }
6152 }
6154 private void errNoSelectInTableScope() throws SAXException {
6155 err("No \u201Cselect\u201D in table scope.");
6156 }
6158 private void errStartSelectWhereEndSelectExpected() throws SAXException {
6159 err("\u201Cselect\u201D start tag where end tag expected.");
6160 }
6162 private void errStartTagWithSelectOpen(@Local String name)
6163 throws SAXException {
6164 if (errorHandler == null) {
6165 return;
6166 }
6167 errNoCheck("\u201C" + name
6168 + "\u201D start tag with \u201Cselect\u201D open.");
6169 }
6171 private void errBadStartTagInHead(@Local String name) throws SAXException {
6172 if (errorHandler == null) {
6173 return;
6174 }
6175 errNoCheck("Bad start tag in \u201C" + name
6176 + "\u201D in \u201Chead\u201D.");
6177 }
6179 private void errImage() throws SAXException {
6180 err("Saw a start tag \u201Cimage\u201D.");
6181 }
6183 private void errIsindex() throws SAXException {
6184 err("\u201Cisindex\u201D seen.");
6185 }
6187 private void errFooSeenWhenFooOpen(@Local String name) throws SAXException {
6188 if (errorHandler == null) {
6189 return;
6190 }
6191 errNoCheck("An \u201C" + name + "\u201D start tag seen but an element of the same type was already open.");
6192 }
6194 private void errHeadingWhenHeadingOpen() throws SAXException {
6195 err("Heading cannot be a child of another heading.");
6196 }
6198 private void errFramesetStart() throws SAXException {
6199 err("\u201Cframeset\u201D start tag seen.");
6200 }
6202 private void errNoCellToClose() throws SAXException {
6203 err("No cell to close.");
6204 }
6206 private void errStartTagInTable(@Local String name) throws SAXException {
6207 if (errorHandler == null) {
6208 return;
6209 }
6210 errNoCheck("Start tag \u201C" + name
6211 + "\u201D seen in \u201Ctable\u201D.");
6212 }
6214 private void errFormWhenFormOpen() throws SAXException {
6215 err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag.");
6216 }
6218 private void errTableSeenWhileTableOpen() throws SAXException {
6219 err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open.");
6220 }
6222 private void errStartTagInTableBody(@Local String name) throws SAXException {
6223 if (errorHandler == null) {
6224 return;
6225 }
6226 errNoCheck("\u201C" + name + "\u201D start tag in table body.");
6227 }
6229 private void errEndTagSeenWithoutDoctype() throws SAXException {
6230 if (!isSrcdocDocument) {
6231 err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D.");
6232 }
6233 }
6235 private void errEndTagAfterBody() throws SAXException {
6236 err("Saw an end tag after \u201Cbody\u201D had been closed.");
6237 }
6239 private void errEndTagSeenWithSelectOpen(@Local String name) throws SAXException {
6240 if (errorHandler == null) {
6241 return;
6242 }
6243 errNoCheck("\u201C" + name
6244 + "\u201D end tag with \u201Cselect\u201D open.");
6245 }
6247 private void errGarbageInColgroup() throws SAXException {
6248 err("Garbage in \u201Ccolgroup\u201D fragment.");
6249 }
6251 private void errEndTagBr() throws SAXException {
6252 err("End tag \u201Cbr\u201D.");
6253 }
6255 private void errNoElementToCloseButEndTagSeen(@Local String name)
6256 throws SAXException {
6257 if (errorHandler == null) {
6258 return;
6259 }
6260 errNoCheck("No \u201C" + name + "\u201D element in scope but a \u201C"
6261 + name + "\u201D end tag seen.");
6262 }
6264 private void errHtmlStartTagInForeignContext(@Local String name)
6265 throws SAXException {
6266 if (errorHandler == null) {
6267 return;
6268 }
6269 errNoCheck("HTML start tag \u201C" + name
6270 + "\u201D in a foreign namespace context.");
6271 }
6273 private void errTableClosedWhileCaptionOpen() throws SAXException {
6274 err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open.");
6275 }
6277 private void errNoTableRowToClose() throws SAXException {
6278 err("No table row to close.");
6279 }
6281 private void errNonSpaceInTable() throws SAXException {
6282 err("Misplaced non-space characters insided a table.");
6283 }
6285 private void errUnclosedChildrenInRuby() throws SAXException {
6286 if (errorHandler == null) {
6287 return;
6288 }
6289 errNoCheck("Unclosed children in \u201Cruby\u201D.");
6290 }
6292 private void errStartTagSeenWithoutRuby(@Local String name) throws SAXException {
6293 if (errorHandler == null) {
6294 return;
6295 }
6296 errNoCheck("Start tag \u201C"
6297 + name
6298 + "\u201D seen without a \u201Cruby\u201D element being open.");
6299 }
6301 private void errSelfClosing() throws SAXException {
6302 if (errorHandler == null) {
6303 return;
6304 }
6305 errNoCheck("Self-closing syntax (\u201C/>\u201D) used on a non-void HTML element. Ignoring the slash and treating as a start tag.");
6306 }
6308 private void errNoCheckUnclosedElementsOnStack() throws SAXException {
6309 errNoCheck("Unclosed elements on stack.");
6310 }
6312 private void errEndTagDidNotMatchCurrentOpenElement(@Local String name,
6313 @Local String currOpenName) throws SAXException {
6314 if (errorHandler == null) {
6315 return;
6316 }
6317 errNoCheck("End tag \u201C"
6318 + name
6319 + "\u201D did not match the name of the current open element (\u201C"
6320 + currOpenName + "\u201D).");
6321 }
6323 private void errEndTagViolatesNestingRules(@Local String name) throws SAXException {
6324 if (errorHandler == null) {
6325 return;
6326 }
6327 errNoCheck("End tag \u201C" + name + "\u201D violates nesting rules.");
6328 }
6330 private void errEofWithUnclosedElements() throws SAXException {
6331 if (errorHandler == null) {
6332 return;
6333 }
6334 errNoCheck("End of file seen and there were open elements.");
6335 // just report all remaining unclosed elements
6336 errListUnclosedStartTags(0);
6337 }
6339 /**
6340 * Reports arriving at/near end of document with unclosed elements remaining.
6341 *
6342 * @param message
6343 * the message
6344 * @throws SAXException
6345 */
6346 private void errEndWithUnclosedElements(@Local String name) throws SAXException {
6347 if (errorHandler == null) {
6348 return;
6349 }
6350 errNoCheck("End tag for \u201C"
6351 + name
6352 + "\u201D seen, but there were unclosed elements.");
6353 // just report all remaining unclosed elements
6354 errListUnclosedStartTags(0);
6355 }
6356 }