parser/html/javasrc/Tokenizer.java

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/html/javasrc/Tokenizer.java	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,7021 @@
     1.4 +/*
     1.5 + * Copyright (c) 2005-2007 Henri Sivonen
     1.6 + * Copyright (c) 2007-2013 Mozilla Foundation
     1.7 + * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 
     1.8 + * Foundation, and Opera Software ASA.
     1.9 + *
    1.10 + * Permission is hereby granted, free of charge, to any person obtaining a 
    1.11 + * copy of this software and associated documentation files (the "Software"), 
    1.12 + * to deal in the Software without restriction, including without limitation 
    1.13 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
    1.14 + * and/or sell copies of the Software, and to permit persons to whom the 
    1.15 + * Software is furnished to do so, subject to the following conditions:
    1.16 + *
    1.17 + * The above copyright notice and this permission notice shall be included in 
    1.18 + * all copies or substantial portions of the Software.
    1.19 + *
    1.20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
    1.21 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
    1.22 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
    1.23 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
    1.24 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
    1.25 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
    1.26 + * DEALINGS IN THE SOFTWARE.
    1.27 + */
    1.28 +
    1.29 +/*
    1.30 + * The comments following this one that use the same comment syntax as this 
    1.31 + * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007 
    1.32 + * amended as of June 18 2008 and May 31 2010.
    1.33 + * That document came with this statement:
    1.34 + * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and 
    1.35 + * Opera Software ASA. You are granted a license to use, reproduce and 
    1.36 + * create derivative works of this document."
    1.37 + */
    1.38 +
    1.39 +package nu.validator.htmlparser.impl;
    1.40 +
    1.41 +import nu.validator.htmlparser.annotation.Auto;
    1.42 +import nu.validator.htmlparser.annotation.CharacterName;
    1.43 +import nu.validator.htmlparser.annotation.Const;
    1.44 +import nu.validator.htmlparser.annotation.Inline;
    1.45 +import nu.validator.htmlparser.annotation.Local;
    1.46 +import nu.validator.htmlparser.annotation.NoLength;
    1.47 +import nu.validator.htmlparser.common.EncodingDeclarationHandler;
    1.48 +import nu.validator.htmlparser.common.Interner;
    1.49 +import nu.validator.htmlparser.common.TokenHandler;
    1.50 +import nu.validator.htmlparser.common.XmlViolationPolicy;
    1.51 +
    1.52 +import org.xml.sax.ErrorHandler;
    1.53 +import org.xml.sax.Locator;
    1.54 +import org.xml.sax.SAXException;
    1.55 +import org.xml.sax.SAXParseException;
    1.56 +
    1.57 +/**
    1.58 + * An implementation of
    1.59 + * http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
    1.60 + * 
    1.61 + * This class implements the <code>Locator</code> interface. This is not an
    1.62 + * incidental implementation detail: Users of this class are encouraged to make
    1.63 + * use of the <code>Locator</code> nature.
    1.64 + * 
    1.65 + * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
    1.66 + * can be configured to treat these conditions as fatal or to coerce the infoset
    1.67 + * to something that XML 1.0 allows.
    1.68 + * 
    1.69 + * @version $Id$
    1.70 + * @author hsivonen
    1.71 + */
    1.72 +public class Tokenizer implements Locator {
    1.73 +
    1.74 +    private static final int DATA_AND_RCDATA_MASK = ~1;
    1.75 +
    1.76 +    public static final int DATA = 0;
    1.77 +
    1.78 +    public static final int RCDATA = 1;
    1.79 +
    1.80 +    public static final int SCRIPT_DATA = 2;
    1.81 +
    1.82 +    public static final int RAWTEXT = 3;
    1.83 +
    1.84 +    public static final int SCRIPT_DATA_ESCAPED = 4;
    1.85 +
    1.86 +    public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
    1.87 +
    1.88 +    public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
    1.89 +
    1.90 +    public static final int ATTRIBUTE_VALUE_UNQUOTED = 7;
    1.91 +
    1.92 +    public static final int PLAINTEXT = 8;
    1.93 +
    1.94 +    public static final int TAG_OPEN = 9;
    1.95 +
    1.96 +    public static final int CLOSE_TAG_OPEN = 10;
    1.97 +
    1.98 +    public static final int TAG_NAME = 11;
    1.99 +
   1.100 +    public static final int BEFORE_ATTRIBUTE_NAME = 12;
   1.101 +
   1.102 +    public static final int ATTRIBUTE_NAME = 13;
   1.103 +
   1.104 +    public static final int AFTER_ATTRIBUTE_NAME = 14;
   1.105 +
   1.106 +    public static final int BEFORE_ATTRIBUTE_VALUE = 15;
   1.107 +
   1.108 +    public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
   1.109 +
   1.110 +    public static final int BOGUS_COMMENT = 17;
   1.111 +
   1.112 +    public static final int MARKUP_DECLARATION_OPEN = 18;
   1.113 +
   1.114 +    public static final int DOCTYPE = 19;
   1.115 +
   1.116 +    public static final int BEFORE_DOCTYPE_NAME = 20;
   1.117 +
   1.118 +    public static final int DOCTYPE_NAME = 21;
   1.119 +
   1.120 +    public static final int AFTER_DOCTYPE_NAME = 22;
   1.121 +
   1.122 +    public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
   1.123 +
   1.124 +    public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
   1.125 +
   1.126 +    public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
   1.127 +
   1.128 +    public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
   1.129 +
   1.130 +    public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
   1.131 +
   1.132 +    public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
   1.133 +
   1.134 +    public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
   1.135 +
   1.136 +    public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
   1.137 +
   1.138 +    public static final int BOGUS_DOCTYPE = 31;
   1.139 +
   1.140 +    public static final int COMMENT_START = 32;
   1.141 +
   1.142 +    public static final int COMMENT_START_DASH = 33;
   1.143 +
   1.144 +    public static final int COMMENT = 34;
   1.145 +
   1.146 +    public static final int COMMENT_END_DASH = 35;
   1.147 +
   1.148 +    public static final int COMMENT_END = 36;
   1.149 +
   1.150 +    public static final int COMMENT_END_BANG = 37;
   1.151 +
   1.152 +    public static final int NON_DATA_END_TAG_NAME = 38;
   1.153 +
   1.154 +    public static final int MARKUP_DECLARATION_HYPHEN = 39;
   1.155 +
   1.156 +    public static final int MARKUP_DECLARATION_OCTYPE = 40;
   1.157 +
   1.158 +    public static final int DOCTYPE_UBLIC = 41;
   1.159 +
   1.160 +    public static final int DOCTYPE_YSTEM = 42;
   1.161 +
   1.162 +    public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
   1.163 +
   1.164 +    public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
   1.165 +
   1.166 +    public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
   1.167 +
   1.168 +    public static final int CONSUME_CHARACTER_REFERENCE = 46;
   1.169 +
   1.170 +    public static final int CONSUME_NCR = 47;
   1.171 +
   1.172 +    public static final int CHARACTER_REFERENCE_TAIL = 48;
   1.173 +
   1.174 +    public static final int HEX_NCR_LOOP = 49;
   1.175 +
   1.176 +    public static final int DECIMAL_NRC_LOOP = 50;
   1.177 +
   1.178 +    public static final int HANDLE_NCR_VALUE = 51;
   1.179 +
   1.180 +    public static final int HANDLE_NCR_VALUE_RECONSUME = 52;
   1.181 +
   1.182 +    public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53;
   1.183 +
   1.184 +    public static final int SELF_CLOSING_START_TAG = 54;
   1.185 +
   1.186 +    public static final int CDATA_START = 55;
   1.187 +
   1.188 +    public static final int CDATA_SECTION = 56;
   1.189 +
   1.190 +    public static final int CDATA_RSQB = 57;
   1.191 +
   1.192 +    public static final int CDATA_RSQB_RSQB = 58;
   1.193 +
   1.194 +    public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59;
   1.195 +
   1.196 +    public static final int SCRIPT_DATA_ESCAPE_START = 60;
   1.197 +
   1.198 +    public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61;
   1.199 +
   1.200 +    public static final int SCRIPT_DATA_ESCAPED_DASH = 62;
   1.201 +
   1.202 +    public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
   1.203 +
   1.204 +    public static final int BOGUS_COMMENT_HYPHEN = 64;
   1.205 +
   1.206 +    public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
   1.207 +
   1.208 +    public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
   1.209 +
   1.210 +    public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
   1.211 +
   1.212 +    public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68;
   1.213 +
   1.214 +    public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
   1.215 +
   1.216 +    public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
   1.217 +
   1.218 +    public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
   1.219 +
   1.220 +    public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
   1.221 +
   1.222 +    public static final int PROCESSING_INSTRUCTION = 73;
   1.223 +
   1.224 +    public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
   1.225 +
   1.226 +    /**
   1.227 +     * Magic value for UTF-16 operations.
   1.228 +     */
   1.229 +    private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
   1.230 +
   1.231 +    /**
   1.232 +     * UTF-16 code unit array containing less than and greater than for emitting
   1.233 +     * those characters on certain parse errors.
   1.234 +     */
   1.235 +    private static final @NoLength char[] LT_GT = { '<', '>' };
   1.236 +
   1.237 +    /**
   1.238 +     * UTF-16 code unit array containing less than and solidus for emitting
   1.239 +     * those characters on certain parse errors.
   1.240 +     */
   1.241 +    private static final @NoLength char[] LT_SOLIDUS = { '<', '/' };
   1.242 +
   1.243 +    /**
   1.244 +     * UTF-16 code unit array containing ]] for emitting those characters on
   1.245 +     * state transitions.
   1.246 +     */
   1.247 +    private static final @NoLength char[] RSQB_RSQB = { ']', ']' };
   1.248 +
   1.249 +    /**
   1.250 +     * Array version of U+FFFD.
   1.251 +     */
   1.252 +    private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
   1.253 +
   1.254 +    // [NOCPP[
   1.255 +
   1.256 +    /**
   1.257 +     * Array version of space.
   1.258 +     */
   1.259 +    private static final @NoLength char[] SPACE = { ' ' };
   1.260 +
   1.261 +    // ]NOCPP]
   1.262 +
   1.263 +    /**
   1.264 +     * Array version of line feed.
   1.265 +     */
   1.266 +    private static final @NoLength char[] LF = { '\n' };
   1.267 +
   1.268 +    /**
   1.269 +     * Buffer growth parameter.
   1.270 +     */
   1.271 +    private static final int BUFFER_GROW_BY = 1024;
   1.272 +
   1.273 +    /**
   1.274 +     * "CDATA[" as <code>char[]</code>
   1.275 +     */
   1.276 +    private static final @NoLength char[] CDATA_LSQB = { 'C', 'D', 'A', 'T',
   1.277 +            'A', '[' };
   1.278 +
   1.279 +    /**
   1.280 +     * "octype" as <code>char[]</code>
   1.281 +     */
   1.282 +    private static final @NoLength char[] OCTYPE = { 'o', 'c', 't', 'y', 'p',
   1.283 +            'e' };
   1.284 +
   1.285 +    /**
   1.286 +     * "ublic" as <code>char[]</code>
   1.287 +     */
   1.288 +    private static final @NoLength char[] UBLIC = { 'u', 'b', 'l', 'i', 'c' };
   1.289 +
   1.290 +    /**
   1.291 +     * "ystem" as <code>char[]</code>
   1.292 +     */
   1.293 +    private static final @NoLength char[] YSTEM = { 'y', 's', 't', 'e', 'm' };
   1.294 +
   1.295 +    private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' };
   1.296 +
   1.297 +    private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' };
   1.298 +
   1.299 +    private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' };
   1.300 +
   1.301 +    private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't',
   1.302 +            'e', 'x', 't' };
   1.303 +
   1.304 +    private static final char[] XMP_ARR = { 'x', 'm', 'p' };
   1.305 +
   1.306 +    private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r',
   1.307 +            'e', 'a' };
   1.308 +
   1.309 +    private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' };
   1.310 +
   1.311 +    private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e',
   1.312 +            'd' };
   1.313 +
   1.314 +    private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i',
   1.315 +            'p', 't' };
   1.316 +
   1.317 +    private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm',
   1.318 +            'e', 's' };
   1.319 +
   1.320 +    /**
   1.321 +     * The token handler.
   1.322 +     */
   1.323 +    protected final TokenHandler tokenHandler;
   1.324 +
   1.325 +    protected EncodingDeclarationHandler encodingDeclarationHandler;
   1.326 +
   1.327 +    // [NOCPP[
   1.328 +
   1.329 +    /**
   1.330 +     * The error handler.
   1.331 +     */
   1.332 +    protected ErrorHandler errorHandler;
   1.333 +
   1.334 +    // ]NOCPP]
   1.335 +
   1.336 +    /**
   1.337 +     * Whether the previous char read was CR.
   1.338 +     */
   1.339 +    protected boolean lastCR;
   1.340 +
   1.341 +    protected int stateSave;
   1.342 +
   1.343 +    private int returnStateSave;
   1.344 +
   1.345 +    protected int index;
   1.346 +
   1.347 +    private boolean forceQuirks;
   1.348 +
   1.349 +    private char additional;
   1.350 +
   1.351 +    private int entCol;
   1.352 +
   1.353 +    private int firstCharKey;
   1.354 +
   1.355 +    private int lo;
   1.356 +
   1.357 +    private int hi;
   1.358 +
   1.359 +    private int candidate;
   1.360 +
   1.361 +    private int strBufMark;
   1.362 +
   1.363 +    private int prevValue;
   1.364 +
   1.365 +    protected int value;
   1.366 +
   1.367 +    private boolean seenDigits;
   1.368 +
   1.369 +    protected int cstart;
   1.370 +
   1.371 +    /**
   1.372 +     * The SAX public id for the resource being tokenized. (Only passed to back
   1.373 +     * as part of locator data.)
   1.374 +     */
   1.375 +    private String publicId;
   1.376 +
   1.377 +    /**
   1.378 +     * The SAX system id for the resource being tokenized. (Only passed to back
   1.379 +     * as part of locator data.)
   1.380 +     */
   1.381 +    private String systemId;
   1.382 +
   1.383 +    /**
   1.384 +     * Buffer for short identifiers.
   1.385 +     */
   1.386 +    private @Auto char[] strBuf;
   1.387 +
   1.388 +    /**
   1.389 +     * Number of significant <code>char</code>s in <code>strBuf</code>.
   1.390 +     */
   1.391 +    private int strBufLen;
   1.392 +
   1.393 +    /**
   1.394 +     * <code>-1</code> to indicate that <code>strBuf</code> is used or otherwise
   1.395 +     * an offset to the main buffer.
   1.396 +     */
   1.397 +    // private int strBufOffset = -1;
   1.398 +    /**
   1.399 +     * Buffer for long strings.
   1.400 +     */
   1.401 +    private @Auto char[] longStrBuf;
   1.402 +
   1.403 +    /**
   1.404 +     * Number of significant <code>char</code>s in <code>longStrBuf</code>.
   1.405 +     */
   1.406 +    private int longStrBufLen;
   1.407 +
   1.408 +    /**
   1.409 +     * <code>-1</code> to indicate that <code>longStrBuf</code> is used or
   1.410 +     * otherwise an offset to the main buffer.
   1.411 +     */
   1.412 +    // private int longStrBufOffset = -1;
   1.413 +
   1.414 +    /**
   1.415 +     * Buffer for expanding NCRs falling into the Basic Multilingual Plane.
   1.416 +     */
   1.417 +    private final @Auto char[] bmpChar;
   1.418 +
   1.419 +    /**
   1.420 +     * Buffer for expanding astral NCRs.
   1.421 +     */
   1.422 +    private final @Auto char[] astralChar;
   1.423 +
   1.424 +    /**
   1.425 +     * The element whose end tag closes the current CDATA or RCDATA element.
   1.426 +     */
   1.427 +    protected ElementName endTagExpectation = null;
   1.428 +
   1.429 +    private char[] endTagExpectationAsArray; // not @Auto!
   1.430 +
   1.431 +    /**
   1.432 +     * <code>true</code> if tokenizing an end tag
   1.433 +     */
   1.434 +    protected boolean endTag;
   1.435 +
   1.436 +    /**
   1.437 +     * The current tag token name.
   1.438 +     */
   1.439 +    private ElementName tagName = null;
   1.440 +
   1.441 +    /**
   1.442 +     * The current attribute name.
   1.443 +     */
   1.444 +    protected AttributeName attributeName = null;
   1.445 +
   1.446 +    // [NOCPP[
   1.447 +
   1.448 +    /**
   1.449 +     * Whether comment tokens are emitted.
   1.450 +     */
   1.451 +    private boolean wantsComments = false;
   1.452 +
   1.453 +    /**
   1.454 +     * <code>true</code> when HTML4-specific additional errors are requested.
   1.455 +     */
   1.456 +    protected boolean html4;
   1.457 +
   1.458 +    /**
   1.459 +     * Whether the stream is past the first 512 bytes.
   1.460 +     */
   1.461 +    private boolean metaBoundaryPassed;
   1.462 +
   1.463 +    // ]NOCPP]
   1.464 +
   1.465 +    /**
   1.466 +     * The name of the current doctype token.
   1.467 +     */
   1.468 +    private @Local String doctypeName;
   1.469 +
   1.470 +    /**
   1.471 +     * The public id of the current doctype token.
   1.472 +     */
   1.473 +    private String publicIdentifier;
   1.474 +
   1.475 +    /**
   1.476 +     * The system id of the current doctype token.
   1.477 +     */
   1.478 +    private String systemIdentifier;
   1.479 +
   1.480 +    /**
   1.481 +     * The attribute holder.
   1.482 +     */
   1.483 +    private HtmlAttributes attributes;
   1.484 +
   1.485 +    // [NOCPP[
   1.486 +
   1.487 +    /**
   1.488 +     * The policy for vertical tab and form feed.
   1.489 +     */
   1.490 +    private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET;
   1.491 +
   1.492 +    /**
   1.493 +     * The policy for comments.
   1.494 +     */
   1.495 +    private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET;
   1.496 +
   1.497 +    private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET;
   1.498 +
   1.499 +    private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET;
   1.500 +
   1.501 +    private boolean html4ModeCompatibleWithXhtml1Schemata;
   1.502 +
   1.503 +    private int mappingLangToXmlLang;
   1.504 +
   1.505 +    // ]NOCPP]
   1.506 +
   1.507 +    private final boolean newAttributesEachTime;
   1.508 +
   1.509 +    private boolean shouldSuspend;
   1.510 +
   1.511 +    protected boolean confident;
   1.512 +
   1.513 +    private int line;
   1.514 +
   1.515 +    private Interner interner;
   1.516 +
   1.517 +    // CPPONLY: private boolean viewingXmlSource;
   1.518 +
   1.519 +    // [NOCPP[
   1.520 +
   1.521 +    protected LocatorImpl ampersandLocation;
   1.522 +
   1.523 +    public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) {
   1.524 +        this.tokenHandler = tokenHandler;
   1.525 +        this.encodingDeclarationHandler = null;
   1.526 +        this.newAttributesEachTime = newAttributesEachTime;
   1.527 +        this.bmpChar = new char[1];
   1.528 +        this.astralChar = new char[2];
   1.529 +        this.tagName = null;
   1.530 +        this.attributeName = null;
   1.531 +        this.doctypeName = null;
   1.532 +        this.publicIdentifier = null;
   1.533 +        this.systemIdentifier = null;
   1.534 +        this.attributes = null;
   1.535 +    }
   1.536 +
   1.537 +    // ]NOCPP]
   1.538 +
   1.539 +    /**
   1.540 +     * The constructor.
   1.541 +     * 
   1.542 +     * @param tokenHandler
   1.543 +     *            the handler for receiving tokens
   1.544 +     */
   1.545 +    public Tokenizer(TokenHandler tokenHandler
   1.546 +    // CPPONLY: , boolean viewingXmlSource        
   1.547 +    ) {
   1.548 +        this.tokenHandler = tokenHandler;
   1.549 +        this.encodingDeclarationHandler = null;
   1.550 +        // [NOCPP[
   1.551 +        this.newAttributesEachTime = false;
   1.552 +        // ]NOCPP]
   1.553 +        this.bmpChar = new char[1];
   1.554 +        this.astralChar = new char[2];
   1.555 +        this.tagName = null;
   1.556 +        this.attributeName = null;
   1.557 +        this.doctypeName = null;
   1.558 +        this.publicIdentifier = null;
   1.559 +        this.systemIdentifier = null;
   1.560 +        // [NOCPP[
   1.561 +        this.attributes = null;
   1.562 +        // ]NOCPP]
   1.563 +        // CPPONLY: this.attributes = tokenHandler.HasBuilder() ? new HtmlAttributes(mappingLangToXmlLang) : null;
   1.564 +        // CPPONLY: this.newAttributesEachTime = !tokenHandler.HasBuilder();
   1.565 +        // CPPONLY: this.viewingXmlSource = viewingXmlSource;
   1.566 +    }
   1.567 +
   1.568 +    public void setInterner(Interner interner) {
   1.569 +        this.interner = interner;
   1.570 +    }
   1.571 +
   1.572 +    public void initLocation(String newPublicId, String newSystemId) {
   1.573 +        this.systemId = newSystemId;
   1.574 +        this.publicId = newPublicId;
   1.575 +
   1.576 +    }
   1.577 +
   1.578 +    // CPPONLY: boolean isViewingXmlSource() {
   1.579 +    // CPPONLY: return viewingXmlSource;
   1.580 +    // CPPONLY: }
   1.581 +
   1.582 +    // [NOCPP[
   1.583 +
   1.584 +    /**
   1.585 +     * Returns the mappingLangToXmlLang.
   1.586 +     * 
   1.587 +     * @return the mappingLangToXmlLang
   1.588 +     */
   1.589 +    public boolean isMappingLangToXmlLang() {
   1.590 +        return mappingLangToXmlLang == AttributeName.HTML_LANG;
   1.591 +    }
   1.592 +
   1.593 +    /**
   1.594 +     * Sets the mappingLangToXmlLang.
   1.595 +     * 
   1.596 +     * @param mappingLangToXmlLang
   1.597 +     *            the mappingLangToXmlLang to set
   1.598 +     */
   1.599 +    public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
   1.600 +        this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG
   1.601 +                : AttributeName.HTML;
   1.602 +    }
   1.603 +
   1.604 +    /**
   1.605 +     * Sets the error handler.
   1.606 +     * 
   1.607 +     * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
   1.608 +     */
   1.609 +    public void setErrorHandler(ErrorHandler eh) {
   1.610 +        this.errorHandler = eh;
   1.611 +    }
   1.612 +
   1.613 +    public ErrorHandler getErrorHandler() {
   1.614 +        return this.errorHandler;
   1.615 +    }
   1.616 +
   1.617 +    /**
   1.618 +     * Sets the commentPolicy.
   1.619 +     * 
   1.620 +     * @param commentPolicy
   1.621 +     *            the commentPolicy to set
   1.622 +     */
   1.623 +    public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
   1.624 +        this.commentPolicy = commentPolicy;
   1.625 +    }
   1.626 +
   1.627 +    /**
   1.628 +     * Sets the contentNonXmlCharPolicy.
   1.629 +     * 
   1.630 +     * @param contentNonXmlCharPolicy
   1.631 +     *            the contentNonXmlCharPolicy to set
   1.632 +     */
   1.633 +    public void setContentNonXmlCharPolicy(
   1.634 +            XmlViolationPolicy contentNonXmlCharPolicy) {
   1.635 +        if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) {
   1.636 +            throw new IllegalArgumentException(
   1.637 +                    "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW.");
   1.638 +        }
   1.639 +    }
   1.640 +
   1.641 +    /**
   1.642 +     * Sets the contentSpacePolicy.
   1.643 +     * 
   1.644 +     * @param contentSpacePolicy
   1.645 +     *            the contentSpacePolicy to set
   1.646 +     */
   1.647 +    public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
   1.648 +        this.contentSpacePolicy = contentSpacePolicy;
   1.649 +    }
   1.650 +
   1.651 +    /**
   1.652 +     * Sets the xmlnsPolicy.
   1.653 +     * 
   1.654 +     * @param xmlnsPolicy
   1.655 +     *            the xmlnsPolicy to set
   1.656 +     */
   1.657 +    public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
   1.658 +        if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
   1.659 +            throw new IllegalArgumentException("Can't use FATAL here.");
   1.660 +        }
   1.661 +        this.xmlnsPolicy = xmlnsPolicy;
   1.662 +    }
   1.663 +
   1.664 +    public void setNamePolicy(XmlViolationPolicy namePolicy) {
   1.665 +        this.namePolicy = namePolicy;
   1.666 +    }
   1.667 +
   1.668 +    /**
   1.669 +     * Sets the html4ModeCompatibleWithXhtml1Schemata.
   1.670 +     * 
   1.671 +     * @param html4ModeCompatibleWithXhtml1Schemata
   1.672 +     *            the html4ModeCompatibleWithXhtml1Schemata to set
   1.673 +     */
   1.674 +    public void setHtml4ModeCompatibleWithXhtml1Schemata(
   1.675 +            boolean html4ModeCompatibleWithXhtml1Schemata) {
   1.676 +        this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
   1.677 +    }
   1.678 +
   1.679 +    // ]NOCPP]
   1.680 +
   1.681 +    // For the token handler to call
   1.682 +    /**
   1.683 +     * Sets the tokenizer state and the associated element name. This should 
   1.684 +     * only ever used to put the tokenizer into one of the states that have
   1.685 +     * a special end tag expectation.
   1.686 +     * 
   1.687 +     * @param specialTokenizerState
   1.688 +     *            the tokenizer state to set
   1.689 +     * @param endTagExpectation
   1.690 +     *            the expected end tag for transitioning back to normal
   1.691 +     */
   1.692 +    public void setStateAndEndTagExpectation(int specialTokenizerState,
   1.693 +            @Local String endTagExpectation) {
   1.694 +        this.stateSave = specialTokenizerState;
   1.695 +        if (specialTokenizerState == Tokenizer.DATA) {
   1.696 +            return;
   1.697 +        }
   1.698 +        @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation);
   1.699 +        this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0,
   1.700 +                asArray.length, interner);
   1.701 +        endTagExpectationToArray();
   1.702 +    }
   1.703 +
   1.704 +    /**
   1.705 +     * Sets the tokenizer state and the associated element name. This should 
   1.706 +     * only ever used to put the tokenizer into one of the states that have
   1.707 +     * a special end tag expectation.
   1.708 +     * 
   1.709 +     * @param specialTokenizerState
   1.710 +     *            the tokenizer state to set
   1.711 +     * @param endTagExpectation
   1.712 +     *            the expected end tag for transitioning back to normal
   1.713 +     */
   1.714 +    public void setStateAndEndTagExpectation(int specialTokenizerState,
   1.715 +            ElementName endTagExpectation) {
   1.716 +        this.stateSave = specialTokenizerState;
   1.717 +        this.endTagExpectation = endTagExpectation;
   1.718 +        endTagExpectationToArray();
   1.719 +    }
   1.720 +
   1.721 +    private void endTagExpectationToArray() {
   1.722 +        switch (endTagExpectation.getGroup()) {
   1.723 +            case TreeBuilder.TITLE:
   1.724 +                endTagExpectationAsArray = TITLE_ARR;
   1.725 +                return;
   1.726 +            case TreeBuilder.SCRIPT:
   1.727 +                endTagExpectationAsArray = SCRIPT_ARR;
   1.728 +                return;
   1.729 +            case TreeBuilder.STYLE:
   1.730 +                endTagExpectationAsArray = STYLE_ARR;
   1.731 +                return;
   1.732 +            case TreeBuilder.PLAINTEXT:
   1.733 +                endTagExpectationAsArray = PLAINTEXT_ARR;
   1.734 +                return;
   1.735 +            case TreeBuilder.XMP:
   1.736 +                endTagExpectationAsArray = XMP_ARR;
   1.737 +                return;
   1.738 +            case TreeBuilder.TEXTAREA:
   1.739 +                endTagExpectationAsArray = TEXTAREA_ARR;
   1.740 +                return;
   1.741 +            case TreeBuilder.IFRAME:
   1.742 +                endTagExpectationAsArray = IFRAME_ARR;
   1.743 +                return;
   1.744 +            case TreeBuilder.NOEMBED:
   1.745 +                endTagExpectationAsArray = NOEMBED_ARR;
   1.746 +                return;
   1.747 +            case TreeBuilder.NOSCRIPT:
   1.748 +                endTagExpectationAsArray = NOSCRIPT_ARR;
   1.749 +                return;
   1.750 +            case TreeBuilder.NOFRAMES:
   1.751 +                endTagExpectationAsArray = NOFRAMES_ARR;
   1.752 +                return;
   1.753 +            default:
   1.754 +                assert false: "Bad end tag expectation.";
   1.755 +                return;
   1.756 +        }
   1.757 +    }
   1.758 +
   1.759 +    /**
   1.760 +     * For C++ use only.
   1.761 +     */
   1.762 +    public void setLineNumber(int line) {
   1.763 +        this.line = line;
   1.764 +    }
   1.765 +
   1.766 +    // start Locator impl
   1.767 +
   1.768 +    /**
   1.769 +     * @see org.xml.sax.Locator#getLineNumber()
   1.770 +     */
   1.771 +    @Inline public int getLineNumber() {
   1.772 +        return line;
   1.773 +    }
   1.774 +
   1.775 +    // [NOCPP[
   1.776 +
   1.777 +    /**
   1.778 +     * @see org.xml.sax.Locator#getColumnNumber()
   1.779 +     */
   1.780 +    @Inline public int getColumnNumber() {
   1.781 +        return -1;
   1.782 +    }
   1.783 +
   1.784 +    /**
   1.785 +     * @see org.xml.sax.Locator#getPublicId()
   1.786 +     */
   1.787 +    public String getPublicId() {
   1.788 +        return publicId;
   1.789 +    }
   1.790 +
   1.791 +    /**
   1.792 +     * @see org.xml.sax.Locator#getSystemId()
   1.793 +     */
   1.794 +    public String getSystemId() {
   1.795 +        return systemId;
   1.796 +    }
   1.797 +
   1.798 +    // end Locator impl
   1.799 +
   1.800 +    // end public API
   1.801 +
   1.802 +    public void notifyAboutMetaBoundary() {
   1.803 +        metaBoundaryPassed = true;
   1.804 +    }
   1.805 +
   1.806 +    void turnOnAdditionalHtml4Errors() {
   1.807 +        html4 = true;
   1.808 +    }
   1.809 +
   1.810 +    // ]NOCPP]
   1.811 +
   1.812 +    HtmlAttributes emptyAttributes() {
   1.813 +        // [NOCPP[
   1.814 +        if (newAttributesEachTime) {
   1.815 +            return new HtmlAttributes(mappingLangToXmlLang);
   1.816 +        } else {
   1.817 +            // ]NOCPP]
   1.818 +            return HtmlAttributes.EMPTY_ATTRIBUTES;
   1.819 +            // [NOCPP[
   1.820 +        }
   1.821 +        // ]NOCPP]
   1.822 +    }
   1.823 +
   1.824 +    @Inline private void clearStrBufAndAppend(char c) {
   1.825 +        strBuf[0] = c;
   1.826 +        strBufLen = 1;
   1.827 +    }
   1.828 +
   1.829 +    @Inline private void clearStrBuf() {
   1.830 +        strBufLen = 0;
   1.831 +    }
   1.832 +
   1.833 +    /**
   1.834 +     * Appends to the smaller buffer.
   1.835 +     * 
   1.836 +     * @param c
   1.837 +     *            the UTF-16 code unit to append
   1.838 +     */
   1.839 +    private void appendStrBuf(char c) {
   1.840 +        if (strBufLen == strBuf.length) {
   1.841 +            char[] newBuf = new char[strBuf.length + Tokenizer.BUFFER_GROW_BY];
   1.842 +            System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
   1.843 +            strBuf = newBuf;
   1.844 +        }
   1.845 +        strBuf[strBufLen++] = c;
   1.846 +    }
   1.847 +
   1.848 +    /**
   1.849 +     * The smaller buffer as a String. Currently only used for error reporting.
   1.850 +     * 
   1.851 +     * <p>
   1.852 +     * C++ memory note: The return value must be released.
   1.853 +     * 
   1.854 +     * @return the smaller buffer as a string
   1.855 +     */
   1.856 +    protected String strBufToString() {
   1.857 +        return Portability.newStringFromBuffer(strBuf, 0, strBufLen);
   1.858 +    }
   1.859 +
   1.860 +    /**
   1.861 +     * Returns the short buffer as a local name. The return value is released in
   1.862 +     * emitDoctypeToken().
   1.863 +     * 
   1.864 +     * @return the smaller buffer as local name
   1.865 +     */
   1.866 +    private void strBufToDoctypeName() {
   1.867 +        doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen,
   1.868 +                interner);
   1.869 +    }
   1.870 +
   1.871 +    /**
   1.872 +     * Emits the smaller buffer as character tokens.
   1.873 +     * 
   1.874 +     * @throws SAXException
   1.875 +     *             if the token handler threw
   1.876 +     */
   1.877 +    private void emitStrBuf() throws SAXException {
   1.878 +        if (strBufLen > 0) {
   1.879 +            tokenHandler.characters(strBuf, 0, strBufLen);
   1.880 +        }
   1.881 +    }
   1.882 +
   1.883 +    @Inline private void clearLongStrBuf() {
   1.884 +        longStrBufLen = 0;
   1.885 +    }
   1.886 +
   1.887 +    @Inline private void clearLongStrBufAndAppend(char c) {
   1.888 +        longStrBuf[0] = c;
   1.889 +        longStrBufLen = 1;
   1.890 +    }
   1.891 +
   1.892 +    /**
   1.893 +     * Appends to the larger buffer.
   1.894 +     * 
   1.895 +     * @param c
   1.896 +     *            the UTF-16 code unit to append
   1.897 +     */
   1.898 +    private void appendLongStrBuf(char c) {
   1.899 +        if (longStrBufLen == longStrBuf.length) {
   1.900 +            char[] newBuf = new char[longStrBufLen + (longStrBufLen >> 1)];
   1.901 +            System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
   1.902 +            longStrBuf = newBuf;
   1.903 +        }
   1.904 +        longStrBuf[longStrBufLen++] = c;
   1.905 +    }
   1.906 +
   1.907 +    @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
   1.908 +        // [NOCPP[
   1.909 +        switch (commentPolicy) {
   1.910 +            case ALTER_INFOSET:
   1.911 +                // detachLongStrBuf();
   1.912 +                appendLongStrBuf(' ');
   1.913 +                // FALLTHROUGH
   1.914 +            case ALLOW:
   1.915 +                warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
   1.916 +                // ]NOCPP]
   1.917 +                appendLongStrBuf('-');
   1.918 +                // [NOCPP[
   1.919 +                break;
   1.920 +            case FATAL:
   1.921 +                fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
   1.922 +                break;
   1.923 +        }
   1.924 +        // ]NOCPP]
   1.925 +    }
   1.926 +
   1.927 +    // [NOCPP[
   1.928 +    private void maybeAppendSpaceToBogusComment() throws SAXException {
   1.929 +        switch (commentPolicy) {
   1.930 +            case ALTER_INFOSET:
   1.931 +                // detachLongStrBuf();
   1.932 +                appendLongStrBuf(' ');
   1.933 +                // FALLTHROUGH
   1.934 +            case ALLOW:
   1.935 +                warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
   1.936 +                break;
   1.937 +            case FATAL:
   1.938 +                fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
   1.939 +                break;
   1.940 +        }
   1.941 +    }
   1.942 +
   1.943 +    // ]NOCPP]
   1.944 +
   1.945 +    @Inline private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c)
   1.946 +            throws SAXException {
   1.947 +        errConsecutiveHyphens();
   1.948 +        // [NOCPP[
   1.949 +        switch (commentPolicy) {
   1.950 +            case ALTER_INFOSET:
   1.951 +                // detachLongStrBuf();
   1.952 +                longStrBufLen--;
   1.953 +                appendLongStrBuf(' ');
   1.954 +                appendLongStrBuf('-');
   1.955 +                // FALLTHROUGH
   1.956 +            case ALLOW:
   1.957 +                warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
   1.958 +                // ]NOCPP]
   1.959 +                appendLongStrBuf(c);
   1.960 +                // [NOCPP[
   1.961 +                break;
   1.962 +            case FATAL:
   1.963 +                fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
   1.964 +                break;
   1.965 +        }
   1.966 +        // ]NOCPP]
   1.967 +    }
   1.968 +
   1.969 +    private void appendLongStrBuf(@NoLength char[] buffer, int offset, int length) {
   1.970 +        int reqLen = longStrBufLen + length;
   1.971 +        if (longStrBuf.length < reqLen) {
   1.972 +            char[] newBuf = new char[reqLen + (reqLen >> 1)];
   1.973 +            System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
   1.974 +            longStrBuf = newBuf;
   1.975 +        }
   1.976 +        System.arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
   1.977 +        longStrBufLen = reqLen;
   1.978 +    }
   1.979 +
   1.980 +    /**
   1.981 +     * Append the contents of the smaller buffer to the larger one.
   1.982 +     */
   1.983 +    @Inline private void appendStrBufToLongStrBuf() {
   1.984 +        appendLongStrBuf(strBuf, 0, strBufLen);
   1.985 +    }
   1.986 +
   1.987 +    /**
   1.988 +     * The larger buffer as a string.
   1.989 +     * 
   1.990 +     * <p>
   1.991 +     * C++ memory note: The return value must be released.
   1.992 +     * 
   1.993 +     * @return the larger buffer as a string
   1.994 +     */
   1.995 +    private String longStrBufToString() {
   1.996 +        return Portability.newStringFromBuffer(longStrBuf, 0, longStrBufLen);
   1.997 +    }
   1.998 +
   1.999 +    /**
  1.1000 +     * Emits the current comment token.
  1.1001 +     * 
  1.1002 +     * @param pos
  1.1003 +     *            TODO
  1.1004 +     * 
  1.1005 +     * @throws SAXException
  1.1006 +     */
  1.1007 +    private void emitComment(int provisionalHyphens, int pos)
  1.1008 +            throws SAXException {
  1.1009 +        // [NOCPP[
  1.1010 +        if (wantsComments) {
  1.1011 +            // ]NOCPP]
  1.1012 +            // if (longStrBufOffset != -1) {
  1.1013 +            // tokenHandler.comment(buf, longStrBufOffset, longStrBufLen
  1.1014 +            // - provisionalHyphens);
  1.1015 +            // } else {
  1.1016 +            tokenHandler.comment(longStrBuf, 0, longStrBufLen
  1.1017 +                    - provisionalHyphens);
  1.1018 +            // }
  1.1019 +            // [NOCPP[
  1.1020 +        }
  1.1021 +        // ]NOCPP]
  1.1022 +        cstart = pos + 1;
  1.1023 +    }
  1.1024 +
  1.1025 +    /**
  1.1026 +     * Flushes coalesced character tokens.
  1.1027 +     * 
  1.1028 +     * @param buf
  1.1029 +     *            TODO
  1.1030 +     * @param pos
  1.1031 +     *            TODO
  1.1032 +     * 
  1.1033 +     * @throws SAXException
  1.1034 +     */
  1.1035 +    protected void flushChars(@NoLength char[] buf, int pos)
  1.1036 +            throws SAXException {
  1.1037 +        if (pos > cstart) {
  1.1038 +            tokenHandler.characters(buf, cstart, pos - cstart);
  1.1039 +        }
  1.1040 +        cstart = Integer.MAX_VALUE;
  1.1041 +    }
  1.1042 +
  1.1043 +    /**
  1.1044 +     * Reports an condition that would make the infoset incompatible with XML
  1.1045 +     * 1.0 as fatal.
  1.1046 +     * 
  1.1047 +     * @param message
  1.1048 +     *            the message
  1.1049 +     * @throws SAXException
  1.1050 +     * @throws SAXParseException
  1.1051 +     */
  1.1052 +    public void fatal(String message) throws SAXException {
  1.1053 +        SAXParseException spe = new SAXParseException(message, this);
  1.1054 +        if (errorHandler != null) {
  1.1055 +            errorHandler.fatalError(spe);
  1.1056 +        }
  1.1057 +        throw spe;
  1.1058 +    }
  1.1059 +
  1.1060 +    /**
  1.1061 +     * Reports a Parse Error.
  1.1062 +     * 
  1.1063 +     * @param message
  1.1064 +     *            the message
  1.1065 +     * @throws SAXException
  1.1066 +     */
  1.1067 +    public void err(String message) throws SAXException {
  1.1068 +        if (errorHandler == null) {
  1.1069 +            return;
  1.1070 +        }
  1.1071 +        SAXParseException spe = new SAXParseException(message, this);
  1.1072 +        errorHandler.error(spe);
  1.1073 +    }
  1.1074 +
  1.1075 +    public void errTreeBuilder(String message) throws SAXException {
  1.1076 +        ErrorHandler eh = null;
  1.1077 +        if (tokenHandler instanceof TreeBuilder<?>) {
  1.1078 +            TreeBuilder<?> treeBuilder = (TreeBuilder<?>) tokenHandler;
  1.1079 +            eh = treeBuilder.getErrorHandler();
  1.1080 +        }
  1.1081 +        if (eh == null) {
  1.1082 +            eh = errorHandler;
  1.1083 +        }
  1.1084 +        if (eh == null) {
  1.1085 +            return;
  1.1086 +        }
  1.1087 +        SAXParseException spe = new SAXParseException(message, this);
  1.1088 +        eh.error(spe);
  1.1089 +    }
  1.1090 +
  1.1091 +    /**
  1.1092 +     * Reports a warning
  1.1093 +     * 
  1.1094 +     * @param message
  1.1095 +     *            the message
  1.1096 +     * @throws SAXException
  1.1097 +     */
  1.1098 +    public void warn(String message) throws SAXException {
  1.1099 +        if (errorHandler == null) {
  1.1100 +            return;
  1.1101 +        }
  1.1102 +        SAXParseException spe = new SAXParseException(message, this);
  1.1103 +        errorHandler.warning(spe);
  1.1104 +    }
  1.1105 +
  1.1106 +    private void strBufToElementNameString() {
  1.1107 +        // if (strBufOffset != -1) {
  1.1108 +        // return ElementName.elementNameByBuffer(buf, strBufOffset, strBufLen);
  1.1109 +        // } else {
  1.1110 +        tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
  1.1111 +                interner);
  1.1112 +        // }
  1.1113 +    }
  1.1114 +
  1.1115 +    private int emitCurrentTagToken(boolean selfClosing, int pos)
  1.1116 +            throws SAXException {
  1.1117 +        cstart = pos + 1;
  1.1118 +        maybeErrSlashInEndTag(selfClosing);
  1.1119 +        stateSave = Tokenizer.DATA;
  1.1120 +        HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
  1.1121 +                : attributes);
  1.1122 +        if (endTag) {
  1.1123 +            /*
  1.1124 +             * When an end tag token is emitted, the content model flag must be
  1.1125 +             * switched to the PCDATA state.
  1.1126 +             */
  1.1127 +            maybeErrAttributesOnEndTag(attrs);
  1.1128 +            // CPPONLY: if (!viewingXmlSource) {
  1.1129 +            tokenHandler.endTag(tagName);
  1.1130 +            // CPPONLY: }
  1.1131 +            // CPPONLY: if (newAttributesEachTime) {
  1.1132 +            // CPPONLY:   Portability.delete(attributes);
  1.1133 +            // CPPONLY:   attributes = null;
  1.1134 +            // CPPONLY: }
  1.1135 +        } else {
  1.1136 +            // CPPONLY: if (viewingXmlSource) {
  1.1137 +            // CPPONLY:   assert newAttributesEachTime;
  1.1138 +            // CPPONLY:   Portability.delete(attributes);
  1.1139 +            // CPPONLY:   attributes = null;
  1.1140 +            // CPPONLY: } else {
  1.1141 +            tokenHandler.startTag(tagName, attrs, selfClosing);
  1.1142 +            // CPPONLY: }
  1.1143 +        }
  1.1144 +        tagName.release();
  1.1145 +        tagName = null;
  1.1146 +        if (newAttributesEachTime) {
  1.1147 +            attributes = null;
  1.1148 +        } else {
  1.1149 +            attributes.clear(mappingLangToXmlLang);
  1.1150 +        }
  1.1151 +        /*
  1.1152 +         * The token handler may have called setStateAndEndTagExpectation
  1.1153 +         * and changed stateSave since the start of this method.
  1.1154 +         */
  1.1155 +        return stateSave;
  1.1156 +    }
  1.1157 +
  1.1158 +    private void attributeNameComplete() throws SAXException {
  1.1159 +        // if (strBufOffset != -1) {
  1.1160 +        // attributeName = AttributeName.nameByBuffer(buf, strBufOffset,
  1.1161 +        // strBufLen, namePolicy != XmlViolationPolicy.ALLOW);
  1.1162 +        // } else {
  1.1163 +        attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen
  1.1164 +        // [NOCPP[
  1.1165 +                , namePolicy != XmlViolationPolicy.ALLOW
  1.1166 +                // ]NOCPP]
  1.1167 +                , interner);
  1.1168 +        // }
  1.1169 +
  1.1170 +        if (attributes == null) {
  1.1171 +            attributes = new HtmlAttributes(mappingLangToXmlLang);
  1.1172 +        }
  1.1173 +
  1.1174 +        /*
  1.1175 +         * When the user agent leaves the attribute name state (and before
  1.1176 +         * emitting the tag token, if appropriate), the complete attribute's
  1.1177 +         * name must be compared to the other attributes on the same token; if
  1.1178 +         * there is already an attribute on the token with the exact same name,
  1.1179 +         * then this is a parse error and the new attribute must be dropped,
  1.1180 +         * along with the value that gets associated with it (if any).
  1.1181 +         */
  1.1182 +        if (attributes.contains(attributeName)) {
  1.1183 +            errDuplicateAttribute();
  1.1184 +            attributeName.release();
  1.1185 +            attributeName = null;
  1.1186 +        }
  1.1187 +    }
  1.1188 +
  1.1189 +    private void addAttributeWithoutValue() throws SAXException {
  1.1190 +        noteAttributeWithoutValue();
  1.1191 +
  1.1192 +        // [NOCPP[
  1.1193 +        if (metaBoundaryPassed && AttributeName.CHARSET == attributeName
  1.1194 +                && ElementName.META == tagName) {
  1.1195 +            err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
  1.1196 +        }
  1.1197 +        // ]NOCPP]
  1.1198 +        if (attributeName != null) {
  1.1199 +            // [NOCPP[
  1.1200 +            if (html4) {
  1.1201 +                if (attributeName.isBoolean()) {
  1.1202 +                    if (html4ModeCompatibleWithXhtml1Schemata) {
  1.1203 +                        attributes.addAttribute(attributeName,
  1.1204 +                                attributeName.getLocal(AttributeName.HTML),
  1.1205 +                                xmlnsPolicy);
  1.1206 +                    } else {
  1.1207 +                        attributes.addAttribute(attributeName, "", xmlnsPolicy);
  1.1208 +                    }
  1.1209 +                } else {
  1.1210 +                    if (AttributeName.BORDER != attributeName) {
  1.1211 +                        err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)");
  1.1212 +                        attributes.addAttribute(attributeName, "", xmlnsPolicy);
  1.1213 +                    }
  1.1214 +                }
  1.1215 +            } else {
  1.1216 +                if (AttributeName.SRC == attributeName
  1.1217 +                        || AttributeName.HREF == attributeName) {
  1.1218 +                    warn("Attribute \u201C"
  1.1219 +                            + attributeName.getLocal(AttributeName.HTML)
  1.1220 +                            + "\u201D without an explicit value seen. The attribute may be dropped by IE7.");
  1.1221 +                }
  1.1222 +                // ]NOCPP]
  1.1223 +                attributes.addAttribute(attributeName,
  1.1224 +                        Portability.newEmptyString()
  1.1225 +                        // [NOCPP[
  1.1226 +                        , xmlnsPolicy
  1.1227 +                // ]NOCPP]
  1.1228 +                );
  1.1229 +                // [NOCPP[
  1.1230 +            }
  1.1231 +            // ]NOCPP]
  1.1232 +            attributeName = null; // attributeName has been adopted by the
  1.1233 +            // |attributes| object
  1.1234 +        }
  1.1235 +    }
  1.1236 +
  1.1237 +    private void addAttributeWithValue() throws SAXException {
  1.1238 +        // [NOCPP[
  1.1239 +        if (metaBoundaryPassed && ElementName.META == tagName
  1.1240 +                && AttributeName.CHARSET == attributeName) {
  1.1241 +            err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
  1.1242 +        }
  1.1243 +        // ]NOCPP]
  1.1244 +        if (attributeName != null) {
  1.1245 +            String val = longStrBufToString(); // Ownership transferred to
  1.1246 +            // HtmlAttributes
  1.1247 +            // CPPONLY: if (mViewSource) {
  1.1248 +            // CPPONLY:   mViewSource.MaybeLinkifyAttributeValue(attributeName, val);
  1.1249 +            // CPPONLY: }
  1.1250 +            // [NOCPP[
  1.1251 +            if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
  1.1252 +                    && attributeName.isCaseFolded()) {
  1.1253 +                val = newAsciiLowerCaseStringFromString(val);
  1.1254 +            }
  1.1255 +            // ]NOCPP]
  1.1256 +            attributes.addAttribute(attributeName, val
  1.1257 +            // [NOCPP[
  1.1258 +                    , xmlnsPolicy
  1.1259 +            // ]NOCPP]
  1.1260 +            );
  1.1261 +            attributeName = null; // attributeName has been adopted by the
  1.1262 +            // |attributes| object
  1.1263 +        }
  1.1264 +    }
  1.1265 +
  1.1266 +    // [NOCPP[
  1.1267 +
  1.1268 +    private static String newAsciiLowerCaseStringFromString(String str) {
  1.1269 +        if (str == null) {
  1.1270 +            return null;
  1.1271 +        }
  1.1272 +        char[] buf = new char[str.length()];
  1.1273 +        for (int i = 0; i < str.length(); i++) {
  1.1274 +            char c = str.charAt(i);
  1.1275 +            if (c >= 'A' && c <= 'Z') {
  1.1276 +                c += 0x20;
  1.1277 +            }
  1.1278 +            buf[i] = c;
  1.1279 +        }
  1.1280 +        return new String(buf);
  1.1281 +    }
  1.1282 +
  1.1283 +    protected void startErrorReporting() throws SAXException {
  1.1284 +
  1.1285 +    }
  1.1286 +
  1.1287 +    // ]NOCPP]
  1.1288 +    
  1.1289 +    public void start() throws SAXException {
  1.1290 +        initializeWithoutStarting();
  1.1291 +        tokenHandler.startTokenization(this);
  1.1292 +        // [NOCPP[
  1.1293 +        startErrorReporting();
  1.1294 +        // ]NOCPP]
  1.1295 +    }
  1.1296 +
  1.1297 +    public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
  1.1298 +        int state = stateSave;
  1.1299 +        int returnState = returnStateSave;
  1.1300 +        char c = '\u0000';
  1.1301 +        shouldSuspend = false;
  1.1302 +        lastCR = false;
  1.1303 +
  1.1304 +        int start = buffer.getStart();
  1.1305 +        /**
  1.1306 +         * The index of the last <code>char</code> read from <code>buf</code>.
  1.1307 +         */
  1.1308 +        int pos = start - 1;
  1.1309 +
  1.1310 +        /**
  1.1311 +         * The index of the first <code>char</code> in <code>buf</code> that is
  1.1312 +         * part of a coalesced run of character tokens or
  1.1313 +         * <code>Integer.MAX_VALUE</code> if there is not a current run being
  1.1314 +         * coalesced.
  1.1315 +         */
  1.1316 +        switch (state) {
  1.1317 +            case DATA:
  1.1318 +            case RCDATA:
  1.1319 +            case SCRIPT_DATA:
  1.1320 +            case PLAINTEXT:
  1.1321 +            case RAWTEXT:
  1.1322 +            case CDATA_SECTION:
  1.1323 +            case SCRIPT_DATA_ESCAPED:
  1.1324 +            case SCRIPT_DATA_ESCAPE_START:
  1.1325 +            case SCRIPT_DATA_ESCAPE_START_DASH:
  1.1326 +            case SCRIPT_DATA_ESCAPED_DASH:
  1.1327 +            case SCRIPT_DATA_ESCAPED_DASH_DASH:
  1.1328 +            case SCRIPT_DATA_DOUBLE_ESCAPE_START:
  1.1329 +            case SCRIPT_DATA_DOUBLE_ESCAPED:
  1.1330 +            case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
  1.1331 +            case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
  1.1332 +            case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
  1.1333 +            case SCRIPT_DATA_DOUBLE_ESCAPE_END:
  1.1334 +                cstart = start;
  1.1335 +                break;
  1.1336 +            default:
  1.1337 +                cstart = Integer.MAX_VALUE;
  1.1338 +                break;
  1.1339 +        }
  1.1340 +
  1.1341 +        /**
  1.1342 +         * The number of <code>char</code>s in <code>buf</code> that have
  1.1343 +         * meaning. (The rest of the array is garbage and should not be
  1.1344 +         * examined.)
  1.1345 +         */
  1.1346 +        // CPPONLY: if (mViewSource) {
  1.1347 +        // CPPONLY:   mViewSource.SetBuffer(buffer);
  1.1348 +        // CPPONLY:   pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
  1.1349 +        // CPPONLY:   mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
  1.1350 +        // CPPONLY: } else {
  1.1351 +        // CPPONLY:   pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
  1.1352 +        // CPPONLY: }
  1.1353 +        // [NOCPP[
  1.1354 +        pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
  1.1355 +                buffer.getEnd());
  1.1356 +        // ]NOCPP]
  1.1357 +        if (pos == buffer.getEnd()) {
  1.1358 +            // exiting due to end of buffer
  1.1359 +            buffer.setStart(pos);
  1.1360 +        } else {
  1.1361 +            buffer.setStart(pos + 1);
  1.1362 +        }
  1.1363 +        return lastCR;
  1.1364 +    }
  1.1365 +
  1.1366 +    @SuppressWarnings("unused") private int stateLoop(int state, char c,
  1.1367 +            int pos, @NoLength char[] buf, boolean reconsume, int returnState,
  1.1368 +            int endPos) throws SAXException {
  1.1369 +        /*
  1.1370 +         * Idioms used in this code:
  1.1371 +         * 
  1.1372 +         * 
  1.1373 +         * Consuming the next input character
  1.1374 +         * 
  1.1375 +         * To consume the next input character, the code does this: if (++pos ==
  1.1376 +         * endPos) { break stateloop; } c = checkChar(buf, pos);
  1.1377 +         * 
  1.1378 +         * 
  1.1379 +         * Staying in a state
  1.1380 +         * 
  1.1381 +         * When there's a state that the tokenizer may stay in over multiple
  1.1382 +         * input characters, the state has a wrapper |for(;;)| loop and staying
  1.1383 +         * in the state continues the loop.
  1.1384 +         * 
  1.1385 +         * 
  1.1386 +         * Switching to another state
  1.1387 +         * 
  1.1388 +         * To switch to another state, the code sets the state variable to the
  1.1389 +         * magic number of the new state. Then it either continues stateloop or
  1.1390 +         * breaks out of the state's own wrapper loop if the target state is
  1.1391 +         * right after the current state in source order. (This is a partial
  1.1392 +         * workaround for Java's lack of goto.)
  1.1393 +         * 
  1.1394 +         * 
  1.1395 +         * Reconsume support
  1.1396 +         * 
  1.1397 +         * The spec sometimes says that an input character is reconsumed in
  1.1398 +         * another state. If a state can ever be entered so that an input
  1.1399 +         * character can be reconsumed in it, the state's code starts with an
  1.1400 +         * |if (reconsume)| that sets reconsume to false and skips over the
  1.1401 +         * normal code for consuming a new character.
  1.1402 +         * 
  1.1403 +         * To reconsume the current character in another state, the code sets
  1.1404 +         * |reconsume| to true and then switches to the other state.
  1.1405 +         * 
  1.1406 +         * 
  1.1407 +         * Emitting character tokens
  1.1408 +         * 
  1.1409 +         * This method emits character tokens lazily. Whenever a new range of
  1.1410 +         * character tokens starts, the field cstart must be set to the start
  1.1411 +         * index of the range. The flushChars() method must be called at the end
  1.1412 +         * of a range to flush it.
  1.1413 +         * 
  1.1414 +         * 
  1.1415 +         * U+0000 handling
  1.1416 +         * 
  1.1417 +         * The various states have to handle the replacement of U+0000 with
  1.1418 +         * U+FFFD. However, if U+0000 would be reconsumed in another state, the
  1.1419 +         * replacement doesn't need to happen, because it's handled by the
  1.1420 +         * reconsuming state.
  1.1421 +         * 
  1.1422 +         * 
  1.1423 +         * LF handling
  1.1424 +         * 
  1.1425 +         * Every state needs to increment the line number upon LF unless the LF
  1.1426 +         * gets reconsumed by another state which increments the line number.
  1.1427 +         * 
  1.1428 +         * 
  1.1429 +         * CR handling
  1.1430 +         * 
  1.1431 +         * Every state needs to handle CR unless the CR gets reconsumed and is
  1.1432 +         * handled by the reconsuming state. The CR needs to be handled as if it
  1.1433 +         * were and LF, the lastCR field must be set to true and then this
  1.1434 +         * method must return. The IO driver will then swallow the next
  1.1435 +         * character if it is an LF to coalesce CRLF.
  1.1436 +         */
  1.1437 +        stateloop: for (;;) {
  1.1438 +            switch (state) {
  1.1439 +                case DATA:
  1.1440 +                    dataloop: for (;;) {
  1.1441 +                        if (reconsume) {
  1.1442 +                            reconsume = false;
  1.1443 +                        } else {
  1.1444 +                            if (++pos == endPos) {
  1.1445 +                                break stateloop;
  1.1446 +                            }
  1.1447 +                            c = checkChar(buf, pos);
  1.1448 +                        }
  1.1449 +                        switch (c) {
  1.1450 +                            case '&':
  1.1451 +                                /*
  1.1452 +                                 * U+0026 AMPERSAND (&) Switch to the character
  1.1453 +                                 * reference in data state.
  1.1454 +                                 */
  1.1455 +                                flushChars(buf, pos);
  1.1456 +                                clearStrBufAndAppend(c);
  1.1457 +                                setAdditionalAndRememberAmpersandLocation('\u0000');
  1.1458 +                                returnState = state;
  1.1459 +                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
  1.1460 +                                continue stateloop;
  1.1461 +                            case '<':
  1.1462 +                                /*
  1.1463 +                                 * U+003C LESS-THAN SIGN (<) Switch to the tag
  1.1464 +                                 * open state.
  1.1465 +                                 */
  1.1466 +                                flushChars(buf, pos);
  1.1467 +
  1.1468 +                                state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
  1.1469 +                                break dataloop; // FALL THROUGH continue
  1.1470 +                            // stateloop;
  1.1471 +                            case '\u0000':
  1.1472 +                                emitReplacementCharacter(buf, pos);
  1.1473 +                                continue;
  1.1474 +                            case '\r':
  1.1475 +                                emitCarriageReturn(buf, pos);
  1.1476 +                                break stateloop;
  1.1477 +                            case '\n':
  1.1478 +                                silentLineFeed();
  1.1479 +                            default:
  1.1480 +                                /*
  1.1481 +                                 * Anything else Emit the input character as a
  1.1482 +                                 * character token.
  1.1483 +                                 * 
  1.1484 +                                 * Stay in the data state.
  1.1485 +                                 */
  1.1486 +                                continue;
  1.1487 +                        }
  1.1488 +                    }
  1.1489 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.1490 +                case TAG_OPEN:
  1.1491 +                    tagopenloop: for (;;) {
  1.1492 +                        /*
  1.1493 +                         * The behavior of this state depends on the content
  1.1494 +                         * model flag.
  1.1495 +                         */
  1.1496 +                        if (++pos == endPos) {
  1.1497 +                            break stateloop;
  1.1498 +                        }
  1.1499 +                        c = checkChar(buf, pos);
  1.1500 +                        /*
  1.1501 +                         * If the content model flag is set to the PCDATA state
  1.1502 +                         * Consume the next input character:
  1.1503 +                         */
  1.1504 +                        if (c >= 'A' && c <= 'Z') {
  1.1505 +                            /*
  1.1506 +                             * U+0041 LATIN CAPITAL LETTER A through to U+005A
  1.1507 +                             * LATIN CAPITAL LETTER Z Create a new start tag
  1.1508 +                             * token,
  1.1509 +                             */
  1.1510 +                            endTag = false;
  1.1511 +                            /*
  1.1512 +                             * set its tag name to the lowercase version of the
  1.1513 +                             * input character (add 0x0020 to the character's
  1.1514 +                             * code point),
  1.1515 +                             */
  1.1516 +                            clearStrBufAndAppend((char) (c + 0x20));
  1.1517 +                            /* then switch to the tag name state. */
  1.1518 +                            state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
  1.1519 +                            /*
  1.1520 +                             * (Don't emit the token yet; further details will
  1.1521 +                             * be filled in before it is emitted.)
  1.1522 +                             */
  1.1523 +                            break tagopenloop;
  1.1524 +                            // continue stateloop;
  1.1525 +                        } else if (c >= 'a' && c <= 'z') {
  1.1526 +                            /*
  1.1527 +                             * U+0061 LATIN SMALL LETTER A through to U+007A
  1.1528 +                             * LATIN SMALL LETTER Z Create a new start tag
  1.1529 +                             * token,
  1.1530 +                             */
  1.1531 +                            endTag = false;
  1.1532 +                            /*
  1.1533 +                             * set its tag name to the input character,
  1.1534 +                             */
  1.1535 +                            clearStrBufAndAppend(c);
  1.1536 +                            /* then switch to the tag name state. */
  1.1537 +                            state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
  1.1538 +                            /*
  1.1539 +                             * (Don't emit the token yet; further details will
  1.1540 +                             * be filled in before it is emitted.)
  1.1541 +                             */
  1.1542 +                            break tagopenloop;
  1.1543 +                            // continue stateloop;
  1.1544 +                        }
  1.1545 +                        switch (c) {
  1.1546 +                            case '!':
  1.1547 +                                /*
  1.1548 +                                 * U+0021 EXCLAMATION MARK (!) Switch to the
  1.1549 +                                 * markup declaration open state.
  1.1550 +                                 */
  1.1551 +                                state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos);
  1.1552 +                                continue stateloop;
  1.1553 +                            case '/':
  1.1554 +                                /*
  1.1555 +                                 * U+002F SOLIDUS (/) Switch to the close tag
  1.1556 +                                 * open state.
  1.1557 +                                 */
  1.1558 +                                state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos);
  1.1559 +                                continue stateloop;
  1.1560 +                            case '?':
  1.1561 +                                // CPPONLY: if (viewingXmlSource) {
  1.1562 +                                // CPPONLY: state = transition(state,
  1.1563 +                                // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION,
  1.1564 +                                // CPPONLY: reconsume,
  1.1565 +                                // CPPONLY: pos);
  1.1566 +                                // CPPONLY: continue stateloop;
  1.1567 +                                // CPPONLY: }
  1.1568 +                                /*
  1.1569 +                                 * U+003F QUESTION MARK (?) Parse error.
  1.1570 +                                 */
  1.1571 +                                errProcessingInstruction();
  1.1572 +                                /*
  1.1573 +                                 * Switch to the bogus comment state.
  1.1574 +                                 */
  1.1575 +                                clearLongStrBufAndAppend(c);
  1.1576 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.1577 +                                continue stateloop;
  1.1578 +                            case '>':
  1.1579 +                                /*
  1.1580 +                                 * U+003E GREATER-THAN SIGN (>) Parse error.
  1.1581 +                                 */
  1.1582 +                                errLtGt();
  1.1583 +                                /*
  1.1584 +                                 * Emit a U+003C LESS-THAN SIGN character token
  1.1585 +                                 * and a U+003E GREATER-THAN SIGN character
  1.1586 +                                 * token.
  1.1587 +                                 */
  1.1588 +                                tokenHandler.characters(Tokenizer.LT_GT, 0, 2);
  1.1589 +                                /* Switch to the data state. */
  1.1590 +                                cstart = pos + 1;
  1.1591 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.1592 +                                continue stateloop;
  1.1593 +                            default:
  1.1594 +                                /*
  1.1595 +                                 * Anything else Parse error.
  1.1596 +                                 */
  1.1597 +                                errBadCharAfterLt(c);
  1.1598 +                                /*
  1.1599 +                                 * Emit a U+003C LESS-THAN SIGN character token
  1.1600 +                                 */
  1.1601 +                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.1602 +                                /*
  1.1603 +                                 * and reconsume the current input character in
  1.1604 +                                 * the data state.
  1.1605 +                                 */
  1.1606 +                                cstart = pos;
  1.1607 +                                reconsume = true;
  1.1608 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.1609 +                                continue stateloop;
  1.1610 +                        }
  1.1611 +                    }
  1.1612 +                    // FALL THROUGH DON'T REORDER
  1.1613 +                case TAG_NAME:
  1.1614 +                    tagnameloop: for (;;) {
  1.1615 +                        if (++pos == endPos) {
  1.1616 +                            break stateloop;
  1.1617 +                        }
  1.1618 +                        c = checkChar(buf, pos);
  1.1619 +                        /*
  1.1620 +                         * Consume the next input character:
  1.1621 +                         */
  1.1622 +                        switch (c) {
  1.1623 +                            case '\r':
  1.1624 +                                silentCarriageReturn();
  1.1625 +                                strBufToElementNameString();
  1.1626 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.1627 +                                break stateloop;
  1.1628 +                            case '\n':
  1.1629 +                                silentLineFeed();
  1.1630 +                            case ' ':
  1.1631 +                            case '\t':
  1.1632 +                            case '\u000C':
  1.1633 +                                /*
  1.1634 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.1635 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.1636 +                                 * Switch to the before attribute name state.
  1.1637 +                                 */
  1.1638 +                                strBufToElementNameString();
  1.1639 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.1640 +                                break tagnameloop;
  1.1641 +                            // continue stateloop;
  1.1642 +                            case '/':
  1.1643 +                                /*
  1.1644 +                                 * U+002F SOLIDUS (/) Switch to the self-closing
  1.1645 +                                 * start tag state.
  1.1646 +                                 */
  1.1647 +                                strBufToElementNameString();
  1.1648 +                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
  1.1649 +                                continue stateloop;
  1.1650 +                            case '>':
  1.1651 +                                /*
  1.1652 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.1653 +                                 * tag token.
  1.1654 +                                 */
  1.1655 +                                strBufToElementNameString();
  1.1656 +                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.1657 +                                if (shouldSuspend) {
  1.1658 +                                    break stateloop;
  1.1659 +                                }
  1.1660 +                                /*
  1.1661 +                                 * Switch to the data state.
  1.1662 +                                 */
  1.1663 +                                continue stateloop;
  1.1664 +                            case '\u0000':
  1.1665 +                                c = '\uFFFD';
  1.1666 +                                // fall thru
  1.1667 +                            default:
  1.1668 +                                if (c >= 'A' && c <= 'Z') {
  1.1669 +                                    /*
  1.1670 +                                     * U+0041 LATIN CAPITAL LETTER A through to
  1.1671 +                                     * U+005A LATIN CAPITAL LETTER Z Append the
  1.1672 +                                     * lowercase version of the current input
  1.1673 +                                     * character (add 0x0020 to the character's
  1.1674 +                                     * code point) to the current tag token's
  1.1675 +                                     * tag name.
  1.1676 +                                     */
  1.1677 +                                    c += 0x20;
  1.1678 +                                }
  1.1679 +                                /*
  1.1680 +                                 * Anything else Append the current input
  1.1681 +                                 * character to the current tag token's tag
  1.1682 +                                 * name.
  1.1683 +                                 */
  1.1684 +                                appendStrBuf(c);
  1.1685 +                                /*
  1.1686 +                                 * Stay in the tag name state.
  1.1687 +                                 */
  1.1688 +                                continue;
  1.1689 +                        }
  1.1690 +                    }
  1.1691 +                    // FALLTHRU DON'T REORDER
  1.1692 +                case BEFORE_ATTRIBUTE_NAME:
  1.1693 +                    beforeattributenameloop: for (;;) {
  1.1694 +                        if (reconsume) {
  1.1695 +                            reconsume = false;
  1.1696 +                        } else {
  1.1697 +                            if (++pos == endPos) {
  1.1698 +                                break stateloop;
  1.1699 +                            }
  1.1700 +                            c = checkChar(buf, pos);
  1.1701 +                        }
  1.1702 +                        /*
  1.1703 +                         * Consume the next input character:
  1.1704 +                         */
  1.1705 +                        switch (c) {
  1.1706 +                            case '\r':
  1.1707 +                                silentCarriageReturn();
  1.1708 +                                break stateloop;
  1.1709 +                            case '\n':
  1.1710 +                                silentLineFeed();
  1.1711 +                                // fall thru
  1.1712 +                            case ' ':
  1.1713 +                            case '\t':
  1.1714 +                            case '\u000C':
  1.1715 +                                /*
  1.1716 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.1717 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.1718 +                                 * in the before attribute name state.
  1.1719 +                                 */
  1.1720 +                                continue;
  1.1721 +                            case '/':
  1.1722 +                                /*
  1.1723 +                                 * U+002F SOLIDUS (/) Switch to the self-closing
  1.1724 +                                 * start tag state.
  1.1725 +                                 */
  1.1726 +                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
  1.1727 +                                continue stateloop;
  1.1728 +                            case '>':
  1.1729 +                                /*
  1.1730 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.1731 +                                 * tag token.
  1.1732 +                                 */
  1.1733 +                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.1734 +                                if (shouldSuspend) {
  1.1735 +                                    break stateloop;
  1.1736 +                                }
  1.1737 +                                /*
  1.1738 +                                 * Switch to the data state.
  1.1739 +                                 */
  1.1740 +                                continue stateloop;
  1.1741 +                            case '\u0000':
  1.1742 +                                c = '\uFFFD';
  1.1743 +                                // fall thru
  1.1744 +                            case '\"':
  1.1745 +                            case '\'':
  1.1746 +                            case '<':
  1.1747 +                            case '=':
  1.1748 +                                /*
  1.1749 +                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
  1.1750 +                                 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
  1.1751 +                                 * SIGN (=) Parse error.
  1.1752 +                                 */
  1.1753 +                                errBadCharBeforeAttributeNameOrNull(c);
  1.1754 +                                /*
  1.1755 +                                 * Treat it as per the "anything else" entry
  1.1756 +                                 * below.
  1.1757 +                                 */
  1.1758 +                            default:
  1.1759 +                                /*
  1.1760 +                                 * Anything else Start a new attribute in the
  1.1761 +                                 * current tag token.
  1.1762 +                                 */
  1.1763 +                                if (c >= 'A' && c <= 'Z') {
  1.1764 +                                    /*
  1.1765 +                                     * U+0041 LATIN CAPITAL LETTER A through to
  1.1766 +                                     * U+005A LATIN CAPITAL LETTER Z Set that
  1.1767 +                                     * attribute's name to the lowercase version
  1.1768 +                                     * of the current input character (add
  1.1769 +                                     * 0x0020 to the character's code point)
  1.1770 +                                     */
  1.1771 +                                    c += 0x20;
  1.1772 +                                }
  1.1773 +                                /*
  1.1774 +                                 * Set that attribute's name to the current
  1.1775 +                                 * input character,
  1.1776 +                                 */
  1.1777 +                                clearStrBufAndAppend(c);
  1.1778 +                                /*
  1.1779 +                                 * and its value to the empty string.
  1.1780 +                                 */
  1.1781 +                                // Will do later.
  1.1782 +                                /*
  1.1783 +                                 * Switch to the attribute name state.
  1.1784 +                                 */
  1.1785 +                                state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
  1.1786 +                                break beforeattributenameloop;
  1.1787 +                            // continue stateloop;
  1.1788 +                        }
  1.1789 +                    }
  1.1790 +                    // FALLTHRU DON'T REORDER
  1.1791 +                case ATTRIBUTE_NAME:
  1.1792 +                    attributenameloop: for (;;) {
  1.1793 +                        if (++pos == endPos) {
  1.1794 +                            break stateloop;
  1.1795 +                        }
  1.1796 +                        c = checkChar(buf, pos);
  1.1797 +                        /*
  1.1798 +                         * Consume the next input character:
  1.1799 +                         */
  1.1800 +                        switch (c) {
  1.1801 +                            case '\r':
  1.1802 +                                silentCarriageReturn();
  1.1803 +                                attributeNameComplete();
  1.1804 +                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
  1.1805 +                                break stateloop;
  1.1806 +                            case '\n':
  1.1807 +                                silentLineFeed();
  1.1808 +                                // fall thru
  1.1809 +                            case ' ':
  1.1810 +                            case '\t':
  1.1811 +                            case '\u000C':
  1.1812 +                                /*
  1.1813 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.1814 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.1815 +                                 * Switch to the after attribute name state.
  1.1816 +                                 */
  1.1817 +                                attributeNameComplete();
  1.1818 +                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos);
  1.1819 +                                continue stateloop;
  1.1820 +                            case '/':
  1.1821 +                                /*
  1.1822 +                                 * U+002F SOLIDUS (/) Switch to the self-closing
  1.1823 +                                 * start tag state.
  1.1824 +                                 */
  1.1825 +                                attributeNameComplete();
  1.1826 +                                addAttributeWithoutValue();
  1.1827 +                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
  1.1828 +                                continue stateloop;
  1.1829 +                            case '=':
  1.1830 +                                /*
  1.1831 +                                 * U+003D EQUALS SIGN (=) Switch to the before
  1.1832 +                                 * attribute value state.
  1.1833 +                                 */
  1.1834 +                                attributeNameComplete();
  1.1835 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
  1.1836 +                                break attributenameloop;
  1.1837 +                            // continue stateloop;
  1.1838 +                            case '>':
  1.1839 +                                /*
  1.1840 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.1841 +                                 * tag token.
  1.1842 +                                 */
  1.1843 +                                attributeNameComplete();
  1.1844 +                                addAttributeWithoutValue();
  1.1845 +                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.1846 +                                if (shouldSuspend) {
  1.1847 +                                    break stateloop;
  1.1848 +                                }
  1.1849 +                                /*
  1.1850 +                                 * Switch to the data state.
  1.1851 +                                 */
  1.1852 +                                continue stateloop;
  1.1853 +                            case '\u0000':
  1.1854 +                                c = '\uFFFD';
  1.1855 +                                // fall thru
  1.1856 +                            case '\"':
  1.1857 +                            case '\'':
  1.1858 +                            case '<':
  1.1859 +                                /*
  1.1860 +                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
  1.1861 +                                 * (') U+003C LESS-THAN SIGN (<) Parse error.
  1.1862 +                                 */
  1.1863 +                                errQuoteOrLtInAttributeNameOrNull(c);
  1.1864 +                                /*
  1.1865 +                                 * Treat it as per the "anything else" entry
  1.1866 +                                 * below.
  1.1867 +                                 */
  1.1868 +                            default:
  1.1869 +                                if (c >= 'A' && c <= 'Z') {
  1.1870 +                                    /*
  1.1871 +                                     * U+0041 LATIN CAPITAL LETTER A through to
  1.1872 +                                     * U+005A LATIN CAPITAL LETTER Z Append the
  1.1873 +                                     * lowercase version of the current input
  1.1874 +                                     * character (add 0x0020 to the character's
  1.1875 +                                     * code point) to the current attribute's
  1.1876 +                                     * name.
  1.1877 +                                     */
  1.1878 +                                    c += 0x20;
  1.1879 +                                }
  1.1880 +                                /*
  1.1881 +                                 * Anything else Append the current input
  1.1882 +                                 * character to the current attribute's name.
  1.1883 +                                 */
  1.1884 +                                appendStrBuf(c);
  1.1885 +                                /*
  1.1886 +                                 * Stay in the attribute name state.
  1.1887 +                                 */
  1.1888 +                                continue;
  1.1889 +                        }
  1.1890 +                    }
  1.1891 +                    // FALLTHRU DON'T REORDER
  1.1892 +                case BEFORE_ATTRIBUTE_VALUE:
  1.1893 +                    beforeattributevalueloop: for (;;) {
  1.1894 +                        if (++pos == endPos) {
  1.1895 +                            break stateloop;
  1.1896 +                        }
  1.1897 +                        c = checkChar(buf, pos);
  1.1898 +                        /*
  1.1899 +                         * Consume the next input character:
  1.1900 +                         */
  1.1901 +                        switch (c) {
  1.1902 +                            case '\r':
  1.1903 +                                silentCarriageReturn();
  1.1904 +                                break stateloop;
  1.1905 +                            case '\n':
  1.1906 +                                silentLineFeed();
  1.1907 +                                // fall thru
  1.1908 +                            case ' ':
  1.1909 +                            case '\t':
  1.1910 +                            case '\u000C':
  1.1911 +                                /*
  1.1912 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.1913 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.1914 +                                 * in the before attribute value state.
  1.1915 +                                 */
  1.1916 +                                continue;
  1.1917 +                            case '"':
  1.1918 +                                /*
  1.1919 +                                 * U+0022 QUOTATION MARK (") Switch to the
  1.1920 +                                 * attribute value (double-quoted) state.
  1.1921 +                                 */
  1.1922 +                                clearLongStrBuf();
  1.1923 +                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
  1.1924 +                                break beforeattributevalueloop;
  1.1925 +                            // continue stateloop;
  1.1926 +                            case '&':
  1.1927 +                                /*
  1.1928 +                                 * U+0026 AMPERSAND (&) Switch to the attribute
  1.1929 +                                 * value (unquoted) state and reconsume this
  1.1930 +                                 * input character.
  1.1931 +                                 */
  1.1932 +                                clearLongStrBuf();
  1.1933 +                                reconsume = true;
  1.1934 +                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
  1.1935 +                                noteUnquotedAttributeValue();
  1.1936 +                                continue stateloop;
  1.1937 +                            case '\'':
  1.1938 +                                /*
  1.1939 +                                 * U+0027 APOSTROPHE (') Switch to the attribute
  1.1940 +                                 * value (single-quoted) state.
  1.1941 +                                 */
  1.1942 +                                clearLongStrBuf();
  1.1943 +                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
  1.1944 +                                continue stateloop;
  1.1945 +                            case '>':
  1.1946 +                                /*
  1.1947 +                                 * U+003E GREATER-THAN SIGN (>) Parse error.
  1.1948 +                                 */
  1.1949 +                                errAttributeValueMissing();
  1.1950 +                                /*
  1.1951 +                                 * Emit the current tag token.
  1.1952 +                                 */
  1.1953 +                                addAttributeWithoutValue();
  1.1954 +                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.1955 +                                if (shouldSuspend) {
  1.1956 +                                    break stateloop;
  1.1957 +                                }
  1.1958 +                                /*
  1.1959 +                                 * Switch to the data state.
  1.1960 +                                 */
  1.1961 +                                continue stateloop;
  1.1962 +                            case '\u0000':
  1.1963 +                                c = '\uFFFD';
  1.1964 +                                // fall thru
  1.1965 +                            case '<':
  1.1966 +                            case '=':
  1.1967 +                            case '`':
  1.1968 +                                /*
  1.1969 +                                 * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
  1.1970 +                                 * (=) U+0060 GRAVE ACCENT (`)
  1.1971 +                                 */
  1.1972 +                                errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
  1.1973 +                                /*
  1.1974 +                                 * Treat it as per the "anything else" entry
  1.1975 +                                 * below.
  1.1976 +                                 */
  1.1977 +                            default:
  1.1978 +                                // [NOCPP[
  1.1979 +                                errHtml4NonNameInUnquotedAttribute(c);
  1.1980 +                                // ]NOCPP]
  1.1981 +                                /*
  1.1982 +                                 * Anything else Append the current input
  1.1983 +                                 * character to the current attribute's value.
  1.1984 +                                 */
  1.1985 +                                clearLongStrBufAndAppend(c);
  1.1986 +                                /*
  1.1987 +                                 * Switch to the attribute value (unquoted)
  1.1988 +                                 * state.
  1.1989 +                                 */
  1.1990 +
  1.1991 +                                state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
  1.1992 +                                noteUnquotedAttributeValue();
  1.1993 +                                continue stateloop;
  1.1994 +                        }
  1.1995 +                    }
  1.1996 +                    // FALLTHRU DON'T REORDER
  1.1997 +                case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
  1.1998 +                    attributevaluedoublequotedloop: for (;;) {
  1.1999 +                        if (reconsume) {
  1.2000 +                            reconsume = false;
  1.2001 +                        } else {
  1.2002 +                            if (++pos == endPos) {
  1.2003 +                                break stateloop;
  1.2004 +                            }
  1.2005 +                            c = checkChar(buf, pos);
  1.2006 +                        }
  1.2007 +                        /*
  1.2008 +                         * Consume the next input character:
  1.2009 +                         */
  1.2010 +                        switch (c) {
  1.2011 +                            case '"':
  1.2012 +                                /*
  1.2013 +                                 * U+0022 QUOTATION MARK (") Switch to the after
  1.2014 +                                 * attribute value (quoted) state.
  1.2015 +                                 */
  1.2016 +                                addAttributeWithValue();
  1.2017 +
  1.2018 +                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
  1.2019 +                                break attributevaluedoublequotedloop;
  1.2020 +                            // continue stateloop;
  1.2021 +                            case '&':
  1.2022 +                                /*
  1.2023 +                                 * U+0026 AMPERSAND (&) Switch to the character
  1.2024 +                                 * reference in attribute value state, with the
  1.2025 +                                 * additional allowed character being U+0022
  1.2026 +                                 * QUOTATION MARK (").
  1.2027 +                                 */
  1.2028 +                                clearStrBufAndAppend(c);
  1.2029 +                                setAdditionalAndRememberAmpersandLocation('\"');
  1.2030 +                                returnState = state;
  1.2031 +                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
  1.2032 +                                continue stateloop;
  1.2033 +                            case '\r':
  1.2034 +                                appendLongStrBufCarriageReturn();
  1.2035 +                                break stateloop;
  1.2036 +                            case '\n':
  1.2037 +                                appendLongStrBufLineFeed();
  1.2038 +                                continue;
  1.2039 +                            case '\u0000':
  1.2040 +                                c = '\uFFFD';
  1.2041 +                                // fall thru
  1.2042 +                            default:
  1.2043 +                                /*
  1.2044 +                                 * Anything else Append the current input
  1.2045 +                                 * character to the current attribute's value.
  1.2046 +                                 */
  1.2047 +                                appendLongStrBuf(c);
  1.2048 +                                /*
  1.2049 +                                 * Stay in the attribute value (double-quoted)
  1.2050 +                                 * state.
  1.2051 +                                 */
  1.2052 +                                continue;
  1.2053 +                        }
  1.2054 +                    }
  1.2055 +                    // FALLTHRU DON'T REORDER
  1.2056 +                case AFTER_ATTRIBUTE_VALUE_QUOTED:
  1.2057 +                    afterattributevaluequotedloop: for (;;) {
  1.2058 +                        if (++pos == endPos) {
  1.2059 +                            break stateloop;
  1.2060 +                        }
  1.2061 +                        c = checkChar(buf, pos);
  1.2062 +                        /*
  1.2063 +                         * Consume the next input character:
  1.2064 +                         */
  1.2065 +                        switch (c) {
  1.2066 +                            case '\r':
  1.2067 +                                silentCarriageReturn();
  1.2068 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.2069 +                                break stateloop;
  1.2070 +                            case '\n':
  1.2071 +                                silentLineFeed();
  1.2072 +                                // fall thru
  1.2073 +                            case ' ':
  1.2074 +                            case '\t':
  1.2075 +                            case '\u000C':
  1.2076 +                                /*
  1.2077 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.2078 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.2079 +                                 * Switch to the before attribute name state.
  1.2080 +                                 */
  1.2081 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.2082 +                                continue stateloop;
  1.2083 +                            case '/':
  1.2084 +                                /*
  1.2085 +                                 * U+002F SOLIDUS (/) Switch to the self-closing
  1.2086 +                                 * start tag state.
  1.2087 +                                 */
  1.2088 +                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
  1.2089 +                                break afterattributevaluequotedloop;
  1.2090 +                            // continue stateloop;
  1.2091 +                            case '>':
  1.2092 +                                /*
  1.2093 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.2094 +                                 * tag token.
  1.2095 +                                 */
  1.2096 +                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.2097 +                                if (shouldSuspend) {
  1.2098 +                                    break stateloop;
  1.2099 +                                }
  1.2100 +                                /*
  1.2101 +                                 * Switch to the data state.
  1.2102 +                                 */
  1.2103 +                                continue stateloop;
  1.2104 +                            default:
  1.2105 +                                /*
  1.2106 +                                 * Anything else Parse error.
  1.2107 +                                 */
  1.2108 +                                errNoSpaceBetweenAttributes();
  1.2109 +                                /*
  1.2110 +                                 * Reconsume the character in the before
  1.2111 +                                 * attribute name state.
  1.2112 +                                 */
  1.2113 +                                reconsume = true;
  1.2114 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.2115 +                                continue stateloop;
  1.2116 +                        }
  1.2117 +                    }
  1.2118 +                    // FALLTHRU DON'T REORDER
  1.2119 +                case SELF_CLOSING_START_TAG:
  1.2120 +                    if (++pos == endPos) {
  1.2121 +                        break stateloop;
  1.2122 +                    }
  1.2123 +                    c = checkChar(buf, pos);
  1.2124 +                    /*
  1.2125 +                     * Consume the next input character:
  1.2126 +                     */
  1.2127 +                    switch (c) {
  1.2128 +                        case '>':
  1.2129 +                            /*
  1.2130 +                             * U+003E GREATER-THAN SIGN (>) Set the self-closing
  1.2131 +                             * flag of the current tag token. Emit the current
  1.2132 +                             * tag token.
  1.2133 +                             */
  1.2134 +                            // [NOCPP[
  1.2135 +                            errHtml4XmlVoidSyntax();
  1.2136 +                            // ]NOCPP]
  1.2137 +                            state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos);
  1.2138 +                            if (shouldSuspend) {
  1.2139 +                                break stateloop;
  1.2140 +                            }
  1.2141 +                            /*
  1.2142 +                             * Switch to the data state.
  1.2143 +                             */
  1.2144 +                            continue stateloop;
  1.2145 +                        default:
  1.2146 +                            /* Anything else Parse error. */
  1.2147 +                            errSlashNotFollowedByGt();
  1.2148 +                            /*
  1.2149 +                             * Reconsume the character in the before attribute
  1.2150 +                             * name state.
  1.2151 +                             */
  1.2152 +                            reconsume = true;
  1.2153 +                            state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.2154 +                            continue stateloop;
  1.2155 +                    }
  1.2156 +                    // XXX reorder point
  1.2157 +                case ATTRIBUTE_VALUE_UNQUOTED:
  1.2158 +                    for (;;) {
  1.2159 +                        if (reconsume) {
  1.2160 +                            reconsume = false;
  1.2161 +                        } else {
  1.2162 +                            if (++pos == endPos) {
  1.2163 +                                break stateloop;
  1.2164 +                            }
  1.2165 +                            c = checkChar(buf, pos);
  1.2166 +                        }
  1.2167 +                        /*
  1.2168 +                         * Consume the next input character:
  1.2169 +                         */
  1.2170 +                        switch (c) {
  1.2171 +                            case '\r':
  1.2172 +                                silentCarriageReturn();
  1.2173 +                                addAttributeWithValue();
  1.2174 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.2175 +                                break stateloop;
  1.2176 +                            case '\n':
  1.2177 +                                silentLineFeed();
  1.2178 +                                // fall thru
  1.2179 +                            case ' ':
  1.2180 +                            case '\t':
  1.2181 +                            case '\u000C':
  1.2182 +                                /*
  1.2183 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.2184 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.2185 +                                 * Switch to the before attribute name state.
  1.2186 +                                 */
  1.2187 +                                addAttributeWithValue();
  1.2188 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.2189 +                                continue stateloop;
  1.2190 +                            case '&':
  1.2191 +                                /*
  1.2192 +                                 * U+0026 AMPERSAND (&) Switch to the character
  1.2193 +                                 * reference in attribute value state, with the
  1.2194 +                                 * additional allowed character being U+003E
  1.2195 +                                 * GREATER-THAN SIGN (>)
  1.2196 +                                 */
  1.2197 +                                clearStrBufAndAppend(c);
  1.2198 +                                setAdditionalAndRememberAmpersandLocation('>');
  1.2199 +                                returnState = state;
  1.2200 +                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
  1.2201 +                                continue stateloop;
  1.2202 +                            case '>':
  1.2203 +                                /*
  1.2204 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.2205 +                                 * tag token.
  1.2206 +                                 */
  1.2207 +                                addAttributeWithValue();
  1.2208 +                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.2209 +                                if (shouldSuspend) {
  1.2210 +                                    break stateloop;
  1.2211 +                                }
  1.2212 +                                /*
  1.2213 +                                 * Switch to the data state.
  1.2214 +                                 */
  1.2215 +                                continue stateloop;
  1.2216 +                            case '\u0000':
  1.2217 +                                c = '\uFFFD';
  1.2218 +                                // fall thru
  1.2219 +                            case '<':
  1.2220 +                            case '\"':
  1.2221 +                            case '\'':
  1.2222 +                            case '=':
  1.2223 +                            case '`':
  1.2224 +                                /*
  1.2225 +                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
  1.2226 +                                 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
  1.2227 +                                 * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
  1.2228 +                                 */
  1.2229 +                                errUnquotedAttributeValOrNull(c);
  1.2230 +                                /*
  1.2231 +                                 * Treat it as per the "anything else" entry
  1.2232 +                                 * below.
  1.2233 +                                 */
  1.2234 +                                // fall through
  1.2235 +                            default:
  1.2236 +                                // [NOCPP]
  1.2237 +                                errHtml4NonNameInUnquotedAttribute(c);
  1.2238 +                                // ]NOCPP]
  1.2239 +                                /*
  1.2240 +                                 * Anything else Append the current input
  1.2241 +                                 * character to the current attribute's value.
  1.2242 +                                 */
  1.2243 +                                appendLongStrBuf(c);
  1.2244 +                                /*
  1.2245 +                                 * Stay in the attribute value (unquoted) state.
  1.2246 +                                 */
  1.2247 +                                continue;
  1.2248 +                        }
  1.2249 +                    }
  1.2250 +                    // XXX reorder point
  1.2251 +                case AFTER_ATTRIBUTE_NAME:
  1.2252 +                    for (;;) {
  1.2253 +                        if (++pos == endPos) {
  1.2254 +                            break stateloop;
  1.2255 +                        }
  1.2256 +                        c = checkChar(buf, pos);
  1.2257 +                        /*
  1.2258 +                         * Consume the next input character:
  1.2259 +                         */
  1.2260 +                        switch (c) {
  1.2261 +                            case '\r':
  1.2262 +                                silentCarriageReturn();
  1.2263 +                                break stateloop;
  1.2264 +                            case '\n':
  1.2265 +                                silentLineFeed();
  1.2266 +                                // fall thru
  1.2267 +                            case ' ':
  1.2268 +                            case '\t':
  1.2269 +                            case '\u000C':
  1.2270 +                                /*
  1.2271 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.2272 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.2273 +                                 * in the after attribute name state.
  1.2274 +                                 */
  1.2275 +                                continue;
  1.2276 +                            case '/':
  1.2277 +                                /*
  1.2278 +                                 * U+002F SOLIDUS (/) Switch to the self-closing
  1.2279 +                                 * start tag state.
  1.2280 +                                 */
  1.2281 +                                addAttributeWithoutValue();
  1.2282 +                                state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
  1.2283 +                                continue stateloop;
  1.2284 +                            case '=':
  1.2285 +                                /*
  1.2286 +                                 * U+003D EQUALS SIGN (=) Switch to the before
  1.2287 +                                 * attribute value state.
  1.2288 +                                 */
  1.2289 +                                state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos);
  1.2290 +                                continue stateloop;
  1.2291 +                            case '>':
  1.2292 +                                /*
  1.2293 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.2294 +                                 * tag token.
  1.2295 +                                 */
  1.2296 +                                addAttributeWithoutValue();
  1.2297 +                                state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.2298 +                                if (shouldSuspend) {
  1.2299 +                                    break stateloop;
  1.2300 +                                }
  1.2301 +                                /*
  1.2302 +                                 * Switch to the data state.
  1.2303 +                                 */
  1.2304 +                                continue stateloop;
  1.2305 +                            case '\u0000':
  1.2306 +                                c = '\uFFFD';
  1.2307 +                                // fall thru
  1.2308 +                            case '\"':
  1.2309 +                            case '\'':
  1.2310 +                            case '<':
  1.2311 +                                errQuoteOrLtInAttributeNameOrNull(c);
  1.2312 +                                /*
  1.2313 +                                 * Treat it as per the "anything else" entry
  1.2314 +                                 * below.
  1.2315 +                                 */
  1.2316 +                            default:
  1.2317 +                                addAttributeWithoutValue();
  1.2318 +                                /*
  1.2319 +                                 * Anything else Start a new attribute in the
  1.2320 +                                 * current tag token.
  1.2321 +                                 */
  1.2322 +                                if (c >= 'A' && c <= 'Z') {
  1.2323 +                                    /*
  1.2324 +                                     * U+0041 LATIN CAPITAL LETTER A through to
  1.2325 +                                     * U+005A LATIN CAPITAL LETTER Z Set that
  1.2326 +                                     * attribute's name to the lowercase version
  1.2327 +                                     * of the current input character (add
  1.2328 +                                     * 0x0020 to the character's code point)
  1.2329 +                                     */
  1.2330 +                                    c += 0x20;
  1.2331 +                                }
  1.2332 +                                /*
  1.2333 +                                 * Set that attribute's name to the current
  1.2334 +                                 * input character,
  1.2335 +                                 */
  1.2336 +                                clearStrBufAndAppend(c);
  1.2337 +                                /*
  1.2338 +                                 * and its value to the empty string.
  1.2339 +                                 */
  1.2340 +                                // Will do later.
  1.2341 +                                /*
  1.2342 +                                 * Switch to the attribute name state.
  1.2343 +                                 */
  1.2344 +                                state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos);
  1.2345 +                                continue stateloop;
  1.2346 +                        }
  1.2347 +                    }
  1.2348 +                    // XXX reorder point
  1.2349 +                case MARKUP_DECLARATION_OPEN:
  1.2350 +                    markupdeclarationopenloop: for (;;) {
  1.2351 +                        if (++pos == endPos) {
  1.2352 +                            break stateloop;
  1.2353 +                        }
  1.2354 +                        c = checkChar(buf, pos);
  1.2355 +                        /*
  1.2356 +                         * If the next two characters are both U+002D
  1.2357 +                         * HYPHEN-MINUS characters (-), consume those two
  1.2358 +                         * characters, create a comment token whose data is the
  1.2359 +                         * empty string, and switch to the comment start state.
  1.2360 +                         * 
  1.2361 +                         * Otherwise, if the next seven characters are an ASCII
  1.2362 +                         * case-insensitive match for the word "DOCTYPE", then
  1.2363 +                         * consume those characters and switch to the DOCTYPE
  1.2364 +                         * state.
  1.2365 +                         * 
  1.2366 +                         * Otherwise, if the insertion mode is
  1.2367 +                         * "in foreign content" and the current node is not an
  1.2368 +                         * element in the HTML namespace and the next seven
  1.2369 +                         * characters are an case-sensitive match for the string
  1.2370 +                         * "[CDATA[" (the five uppercase letters "CDATA" with a
  1.2371 +                         * U+005B LEFT SQUARE BRACKET character before and
  1.2372 +                         * after), then consume those characters and switch to
  1.2373 +                         * the CDATA section state.
  1.2374 +                         * 
  1.2375 +                         * Otherwise, is is a parse error. Switch to the bogus
  1.2376 +                         * comment state. The next character that is consumed,
  1.2377 +                         * if any, is the first character that will be in the
  1.2378 +                         * comment.
  1.2379 +                         */
  1.2380 +                        switch (c) {
  1.2381 +                            case '-':
  1.2382 +                                clearLongStrBufAndAppend(c);
  1.2383 +                                state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
  1.2384 +                                break markupdeclarationopenloop;
  1.2385 +                            // continue stateloop;
  1.2386 +                            case 'd':
  1.2387 +                            case 'D':
  1.2388 +                                clearLongStrBufAndAppend(c);
  1.2389 +                                index = 0;
  1.2390 +                                state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
  1.2391 +                                continue stateloop;
  1.2392 +                            case '[':
  1.2393 +                                if (tokenHandler.cdataSectionAllowed()) {
  1.2394 +                                    clearLongStrBufAndAppend(c);
  1.2395 +                                    index = 0;
  1.2396 +                                    state = transition(state, Tokenizer.CDATA_START, reconsume, pos);
  1.2397 +                                    continue stateloop;
  1.2398 +                                }
  1.2399 +                                // else fall through
  1.2400 +                            default:
  1.2401 +                                errBogusComment();
  1.2402 +                                clearLongStrBuf();
  1.2403 +                                reconsume = true;
  1.2404 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.2405 +                                continue stateloop;
  1.2406 +                        }
  1.2407 +                    }
  1.2408 +                    // FALLTHRU DON'T REORDER
  1.2409 +                case MARKUP_DECLARATION_HYPHEN:
  1.2410 +                    markupdeclarationhyphenloop: for (;;) {
  1.2411 +                        if (++pos == endPos) {
  1.2412 +                            break stateloop;
  1.2413 +                        }
  1.2414 +                        c = checkChar(buf, pos);
  1.2415 +                        switch (c) {
  1.2416 +                            case '\u0000':
  1.2417 +                                break stateloop;
  1.2418 +                            case '-':
  1.2419 +                                clearLongStrBuf();
  1.2420 +                                state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
  1.2421 +                                break markupdeclarationhyphenloop;
  1.2422 +                            // continue stateloop;
  1.2423 +                            default:
  1.2424 +                                errBogusComment();
  1.2425 +                                reconsume = true;
  1.2426 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.2427 +                                continue stateloop;
  1.2428 +                        }
  1.2429 +                    }
  1.2430 +                    // FALLTHRU DON'T REORDER
  1.2431 +                case COMMENT_START:
  1.2432 +                    commentstartloop: for (;;) {
  1.2433 +                        if (++pos == endPos) {
  1.2434 +                            break stateloop;
  1.2435 +                        }
  1.2436 +                        c = checkChar(buf, pos);
  1.2437 +                        /*
  1.2438 +                         * Comment start state
  1.2439 +                         * 
  1.2440 +                         * 
  1.2441 +                         * Consume the next input character:
  1.2442 +                         */
  1.2443 +                        switch (c) {
  1.2444 +                            case '-':
  1.2445 +                                /*
  1.2446 +                                 * U+002D HYPHEN-MINUS (-) Switch to the comment
  1.2447 +                                 * start dash state.
  1.2448 +                                 */
  1.2449 +                                appendLongStrBuf(c);
  1.2450 +                                state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
  1.2451 +                                continue stateloop;
  1.2452 +                            case '>':
  1.2453 +                                /*
  1.2454 +                                 * U+003E GREATER-THAN SIGN (>) Parse error.
  1.2455 +                                 */
  1.2456 +                                errPrematureEndOfComment();
  1.2457 +                                /* Emit the comment token. */
  1.2458 +                                emitComment(0, pos);
  1.2459 +                                /*
  1.2460 +                                 * Switch to the data state.
  1.2461 +                                 */
  1.2462 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.2463 +                                continue stateloop;
  1.2464 +                            case '\r':
  1.2465 +                                appendLongStrBufCarriageReturn();
  1.2466 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2467 +                                break stateloop;
  1.2468 +                            case '\n':
  1.2469 +                                appendLongStrBufLineFeed();
  1.2470 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2471 +                                break commentstartloop;
  1.2472 +                            case '\u0000':
  1.2473 +                                c = '\uFFFD';
  1.2474 +                                // fall thru
  1.2475 +                            default:
  1.2476 +                                /*
  1.2477 +                                 * Anything else Append the input character to
  1.2478 +                                 * the comment token's data.
  1.2479 +                                 */
  1.2480 +                                appendLongStrBuf(c);
  1.2481 +                                /*
  1.2482 +                                 * Switch to the comment state.
  1.2483 +                                 */
  1.2484 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2485 +                                break commentstartloop;
  1.2486 +                            // continue stateloop;
  1.2487 +                        }
  1.2488 +                    }
  1.2489 +                    // FALLTHRU DON'T REORDER
  1.2490 +                case COMMENT:
  1.2491 +                    commentloop: for (;;) {
  1.2492 +                        if (++pos == endPos) {
  1.2493 +                            break stateloop;
  1.2494 +                        }
  1.2495 +                        c = checkChar(buf, pos);
  1.2496 +                        /*
  1.2497 +                         * Comment state Consume the next input character:
  1.2498 +                         */
  1.2499 +                        switch (c) {
  1.2500 +                            case '-':
  1.2501 +                                /*
  1.2502 +                                 * U+002D HYPHEN-MINUS (-) Switch to the comment
  1.2503 +                                 * end dash state
  1.2504 +                                 */
  1.2505 +                                appendLongStrBuf(c);
  1.2506 +                                state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
  1.2507 +                                break commentloop;
  1.2508 +                            // continue stateloop;
  1.2509 +                            case '\r':
  1.2510 +                                appendLongStrBufCarriageReturn();
  1.2511 +                                break stateloop;
  1.2512 +                            case '\n':
  1.2513 +                                appendLongStrBufLineFeed();
  1.2514 +                                continue;
  1.2515 +                            case '\u0000':
  1.2516 +                                c = '\uFFFD';
  1.2517 +                                // fall thru
  1.2518 +                            default:
  1.2519 +                                /*
  1.2520 +                                 * Anything else Append the input character to
  1.2521 +                                 * the comment token's data.
  1.2522 +                                 */
  1.2523 +                                appendLongStrBuf(c);
  1.2524 +                                /*
  1.2525 +                                 * Stay in the comment state.
  1.2526 +                                 */
  1.2527 +                                continue;
  1.2528 +                        }
  1.2529 +                    }
  1.2530 +                    // FALLTHRU DON'T REORDER
  1.2531 +                case COMMENT_END_DASH:
  1.2532 +                    commentenddashloop: for (;;) {
  1.2533 +                        if (++pos == endPos) {
  1.2534 +                            break stateloop;
  1.2535 +                        }
  1.2536 +                        c = checkChar(buf, pos);
  1.2537 +                        /*
  1.2538 +                         * Comment end dash state Consume the next input
  1.2539 +                         * character:
  1.2540 +                         */
  1.2541 +                        switch (c) {
  1.2542 +                            case '-':
  1.2543 +                                /*
  1.2544 +                                 * U+002D HYPHEN-MINUS (-) Switch to the comment
  1.2545 +                                 * end state
  1.2546 +                                 */
  1.2547 +                                appendLongStrBuf(c);
  1.2548 +                                state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
  1.2549 +                                break commentenddashloop;
  1.2550 +                            // continue stateloop;
  1.2551 +                            case '\r':
  1.2552 +                                appendLongStrBufCarriageReturn();
  1.2553 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2554 +                                break stateloop;
  1.2555 +                            case '\n':
  1.2556 +                                appendLongStrBufLineFeed();
  1.2557 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2558 +                                continue stateloop;
  1.2559 +                            case '\u0000':
  1.2560 +                                c = '\uFFFD';
  1.2561 +                                // fall thru
  1.2562 +                            default:
  1.2563 +                                /*
  1.2564 +                                 * Anything else Append a U+002D HYPHEN-MINUS
  1.2565 +                                 * (-) character and the input character to the
  1.2566 +                                 * comment token's data.
  1.2567 +                                 */
  1.2568 +                                appendLongStrBuf(c);
  1.2569 +                                /*
  1.2570 +                                 * Switch to the comment state.
  1.2571 +                                 */
  1.2572 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2573 +                                continue stateloop;
  1.2574 +                        }
  1.2575 +                    }
  1.2576 +                    // FALLTHRU DON'T REORDER
  1.2577 +                case COMMENT_END:
  1.2578 +                    commentendloop: for (;;) {
  1.2579 +                        if (++pos == endPos) {
  1.2580 +                            break stateloop;
  1.2581 +                        }
  1.2582 +                        c = checkChar(buf, pos);
  1.2583 +                        /*
  1.2584 +                         * Comment end dash state Consume the next input
  1.2585 +                         * character:
  1.2586 +                         */
  1.2587 +                        switch (c) {
  1.2588 +                            case '>':
  1.2589 +                                /*
  1.2590 +                                 * U+003E GREATER-THAN SIGN (>) Emit the comment
  1.2591 +                                 * token.
  1.2592 +                                 */
  1.2593 +                                emitComment(2, pos);
  1.2594 +                                /*
  1.2595 +                                 * Switch to the data state.
  1.2596 +                                 */
  1.2597 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.2598 +                                continue stateloop;
  1.2599 +                            case '-':
  1.2600 +                                /* U+002D HYPHEN-MINUS (-) Parse error. */
  1.2601 +                                /*
  1.2602 +                                 * Append a U+002D HYPHEN-MINUS (-) character to
  1.2603 +                                 * the comment token's data.
  1.2604 +                                 */
  1.2605 +                                adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
  1.2606 +                                /*
  1.2607 +                                 * Stay in the comment end state.
  1.2608 +                                 */
  1.2609 +                                continue;
  1.2610 +                            case '\r':
  1.2611 +                                adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
  1.2612 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2613 +                                break stateloop;
  1.2614 +                            case '\n':
  1.2615 +                                adjustDoubleHyphenAndAppendToLongStrBufLineFeed();
  1.2616 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2617 +                                continue stateloop;
  1.2618 +                            case '!':
  1.2619 +                                errHyphenHyphenBang();
  1.2620 +                                appendLongStrBuf(c);
  1.2621 +                                state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
  1.2622 +                                continue stateloop;
  1.2623 +                            case '\u0000':
  1.2624 +                                c = '\uFFFD';
  1.2625 +                                // fall thru
  1.2626 +                            default:
  1.2627 +                                /*
  1.2628 +                                 * Append two U+002D HYPHEN-MINUS (-) characters
  1.2629 +                                 * and the input character to the comment
  1.2630 +                                 * token's data.
  1.2631 +                                 */
  1.2632 +                                adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
  1.2633 +                                /*
  1.2634 +                                 * Switch to the comment state.
  1.2635 +                                 */
  1.2636 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2637 +                                continue stateloop;
  1.2638 +                        }
  1.2639 +                    }
  1.2640 +                    // XXX reorder point
  1.2641 +                case COMMENT_END_BANG:
  1.2642 +                    for (;;) {
  1.2643 +                        if (++pos == endPos) {
  1.2644 +                            break stateloop;
  1.2645 +                        }
  1.2646 +                        c = checkChar(buf, pos);
  1.2647 +                        /*
  1.2648 +                         * Comment end bang state
  1.2649 +                         * 
  1.2650 +                         * Consume the next input character:
  1.2651 +                         */
  1.2652 +                        switch (c) {
  1.2653 +                            case '>':
  1.2654 +                                /*
  1.2655 +                                 * U+003E GREATER-THAN SIGN (>) Emit the comment
  1.2656 +                                 * token.
  1.2657 +                                 */
  1.2658 +                                emitComment(3, pos);
  1.2659 +                                /*
  1.2660 +                                 * Switch to the data state.
  1.2661 +                                 */
  1.2662 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.2663 +                                continue stateloop;
  1.2664 +                            case '-':
  1.2665 +                                /*
  1.2666 +                                 * Append two U+002D HYPHEN-MINUS (-) characters
  1.2667 +                                 * and a U+0021 EXCLAMATION MARK (!) character
  1.2668 +                                 * to the comment token's data.
  1.2669 +                                 */
  1.2670 +                                appendLongStrBuf(c);
  1.2671 +                                /*
  1.2672 +                                 * Switch to the comment end dash state.
  1.2673 +                                 */
  1.2674 +                                state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
  1.2675 +                                continue stateloop;
  1.2676 +                            case '\r':
  1.2677 +                                appendLongStrBufCarriageReturn();
  1.2678 +                                break stateloop;
  1.2679 +                            case '\n':
  1.2680 +                                appendLongStrBufLineFeed();
  1.2681 +                                continue;
  1.2682 +                            case '\u0000':
  1.2683 +                                c = '\uFFFD';
  1.2684 +                                // fall thru
  1.2685 +                            default:
  1.2686 +                                /*
  1.2687 +                                 * Anything else Append two U+002D HYPHEN-MINUS
  1.2688 +                                 * (-) characters, a U+0021 EXCLAMATION MARK (!)
  1.2689 +                                 * character, and the input character to the
  1.2690 +                                 * comment token's data. Switch to the comment
  1.2691 +                                 * state.
  1.2692 +                                 */
  1.2693 +                                appendLongStrBuf(c);
  1.2694 +                                /*
  1.2695 +                                 * Switch to the comment state.
  1.2696 +                                 */
  1.2697 +                                state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2698 +                                continue stateloop;
  1.2699 +                        }
  1.2700 +                    }
  1.2701 +                    // XXX reorder point
  1.2702 +                case COMMENT_START_DASH:
  1.2703 +                    if (++pos == endPos) {
  1.2704 +                        break stateloop;
  1.2705 +                    }
  1.2706 +                    c = checkChar(buf, pos);
  1.2707 +                    /*
  1.2708 +                     * Comment start dash state
  1.2709 +                     * 
  1.2710 +                     * Consume the next input character:
  1.2711 +                     */
  1.2712 +                    switch (c) {
  1.2713 +                        case '-':
  1.2714 +                            /*
  1.2715 +                             * U+002D HYPHEN-MINUS (-) Switch to the comment end
  1.2716 +                             * state
  1.2717 +                             */
  1.2718 +                            appendLongStrBuf(c);
  1.2719 +                            state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
  1.2720 +                            continue stateloop;
  1.2721 +                        case '>':
  1.2722 +                            errPrematureEndOfComment();
  1.2723 +                            /* Emit the comment token. */
  1.2724 +                            emitComment(1, pos);
  1.2725 +                            /*
  1.2726 +                             * Switch to the data state.
  1.2727 +                             */
  1.2728 +                            state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.2729 +                            continue stateloop;
  1.2730 +                        case '\r':
  1.2731 +                            appendLongStrBufCarriageReturn();
  1.2732 +                            state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2733 +                            break stateloop;
  1.2734 +                        case '\n':
  1.2735 +                            appendLongStrBufLineFeed();
  1.2736 +                            state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2737 +                            continue stateloop;
  1.2738 +                        case '\u0000':
  1.2739 +                            c = '\uFFFD';
  1.2740 +                            // fall thru
  1.2741 +                        default:
  1.2742 +                            /*
  1.2743 +                             * Append a U+002D HYPHEN-MINUS character (-) and
  1.2744 +                             * the current input character to the comment
  1.2745 +                             * token's data.
  1.2746 +                             */
  1.2747 +                            appendLongStrBuf(c);
  1.2748 +                            /*
  1.2749 +                             * Switch to the comment state.
  1.2750 +                             */
  1.2751 +                            state = transition(state, Tokenizer.COMMENT, reconsume, pos);
  1.2752 +                            continue stateloop;
  1.2753 +                    }
  1.2754 +                    // XXX reorder point
  1.2755 +                case CDATA_START:
  1.2756 +                    for (;;) {
  1.2757 +                        if (++pos == endPos) {
  1.2758 +                            break stateloop;
  1.2759 +                        }
  1.2760 +                        c = checkChar(buf, pos);
  1.2761 +                        if (index < 6) { // CDATA_LSQB.length
  1.2762 +                            if (c == Tokenizer.CDATA_LSQB[index]) {
  1.2763 +                                appendLongStrBuf(c);
  1.2764 +                            } else {
  1.2765 +                                errBogusComment();
  1.2766 +                                reconsume = true;
  1.2767 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.2768 +                                continue stateloop;
  1.2769 +                            }
  1.2770 +                            index++;
  1.2771 +                            continue;
  1.2772 +                        } else {
  1.2773 +                            cstart = pos; // start coalescing
  1.2774 +                            reconsume = true;
  1.2775 +                            state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
  1.2776 +                            break; // FALL THROUGH continue stateloop;
  1.2777 +                        }
  1.2778 +                    }
  1.2779 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.2780 +                case CDATA_SECTION:
  1.2781 +                    cdatasectionloop: for (;;) {
  1.2782 +                        if (reconsume) {
  1.2783 +                            reconsume = false;
  1.2784 +                        } else {
  1.2785 +                            if (++pos == endPos) {
  1.2786 +                                break stateloop;
  1.2787 +                            }
  1.2788 +                            c = checkChar(buf, pos);
  1.2789 +                        }
  1.2790 +                        switch (c) {
  1.2791 +                            case ']':
  1.2792 +                                flushChars(buf, pos);
  1.2793 +                                state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos);
  1.2794 +                                break cdatasectionloop; // FALL THROUGH
  1.2795 +                            case '\u0000':
  1.2796 +                                emitReplacementCharacter(buf, pos);
  1.2797 +                                continue;
  1.2798 +                            case '\r':
  1.2799 +                                emitCarriageReturn(buf, pos);
  1.2800 +                                break stateloop;
  1.2801 +                            case '\n':
  1.2802 +                                silentLineFeed();
  1.2803 +                                // fall thru
  1.2804 +                            default:
  1.2805 +                                continue;
  1.2806 +                        }
  1.2807 +                    }
  1.2808 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.2809 +                case CDATA_RSQB:
  1.2810 +                    cdatarsqb: for (;;) {
  1.2811 +                        if (++pos == endPos) {
  1.2812 +                            break stateloop;
  1.2813 +                        }
  1.2814 +                        c = checkChar(buf, pos);
  1.2815 +                        switch (c) {
  1.2816 +                            case ']':
  1.2817 +                                state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos);
  1.2818 +                                break cdatarsqb;
  1.2819 +                            default:
  1.2820 +                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0,
  1.2821 +                                        1);
  1.2822 +                                cstart = pos;
  1.2823 +                                reconsume = true;
  1.2824 +                                state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
  1.2825 +                                continue stateloop;
  1.2826 +                        }
  1.2827 +                    }
  1.2828 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.2829 +                case CDATA_RSQB_RSQB:
  1.2830 +                    cdatarsqbrsqb: for (;;) {
  1.2831 +                        if (++pos == endPos) {
  1.2832 +                            break stateloop;
  1.2833 +                        }
  1.2834 +                        c = checkChar(buf, pos);
  1.2835 +                        switch (c) {
  1.2836 +                            case ']':
  1.2837 +                                // Saw a third ]. Emit one ] (logically the 
  1.2838 +                                // first one) and stay in this state to 
  1.2839 +                                // remember that the last two characters seen
  1.2840 +                                // have been ]].
  1.2841 +                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);                                
  1.2842 +                                continue;
  1.2843 +                            case '>':
  1.2844 +                                cstart = pos + 1;
  1.2845 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.2846 +                                continue stateloop;
  1.2847 +                            default:
  1.2848 +                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
  1.2849 +                                cstart = pos;
  1.2850 +                                reconsume = true;
  1.2851 +                                state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
  1.2852 +                                continue stateloop;
  1.2853 +                        }
  1.2854 +                    }
  1.2855 +                    // XXX reorder point
  1.2856 +                case ATTRIBUTE_VALUE_SINGLE_QUOTED:
  1.2857 +                    attributevaluesinglequotedloop: for (;;) {
  1.2858 +                        if (reconsume) {
  1.2859 +                            reconsume = false;
  1.2860 +                        } else {
  1.2861 +                            if (++pos == endPos) {
  1.2862 +                                break stateloop;
  1.2863 +                            }
  1.2864 +                            c = checkChar(buf, pos);
  1.2865 +                        }
  1.2866 +                        /*
  1.2867 +                         * Consume the next input character:
  1.2868 +                         */
  1.2869 +                        switch (c) {
  1.2870 +                            case '\'':
  1.2871 +                                /*
  1.2872 +                                 * U+0027 APOSTROPHE (') Switch to the after
  1.2873 +                                 * attribute value (quoted) state.
  1.2874 +                                 */
  1.2875 +                                addAttributeWithValue();
  1.2876 +
  1.2877 +                                state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
  1.2878 +                                continue stateloop;
  1.2879 +                            case '&':
  1.2880 +                                /*
  1.2881 +                                 * U+0026 AMPERSAND (&) Switch to the character
  1.2882 +                                 * reference in attribute value state, with the
  1.2883 +                                 * + additional allowed character being U+0027
  1.2884 +                                 * APOSTROPHE (').
  1.2885 +                                 */
  1.2886 +                                clearStrBufAndAppend(c);
  1.2887 +                                setAdditionalAndRememberAmpersandLocation('\'');
  1.2888 +                                returnState = state;
  1.2889 +                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
  1.2890 +                                break attributevaluesinglequotedloop;
  1.2891 +                            // continue stateloop;
  1.2892 +                            case '\r':
  1.2893 +                                appendLongStrBufCarriageReturn();
  1.2894 +                                break stateloop;
  1.2895 +                            case '\n':
  1.2896 +                                appendLongStrBufLineFeed();
  1.2897 +                                continue;
  1.2898 +                            case '\u0000':
  1.2899 +                                c = '\uFFFD';
  1.2900 +                                // fall thru
  1.2901 +                            default:
  1.2902 +                                /*
  1.2903 +                                 * Anything else Append the current input
  1.2904 +                                 * character to the current attribute's value.
  1.2905 +                                 */
  1.2906 +                                appendLongStrBuf(c);
  1.2907 +                                /*
  1.2908 +                                 * Stay in the attribute value (double-quoted)
  1.2909 +                                 * state.
  1.2910 +                                 */
  1.2911 +                                continue;
  1.2912 +                        }
  1.2913 +                    }
  1.2914 +                    // FALLTHRU DON'T REORDER
  1.2915 +                case CONSUME_CHARACTER_REFERENCE:
  1.2916 +                    if (++pos == endPos) {
  1.2917 +                        break stateloop;
  1.2918 +                    }
  1.2919 +                    c = checkChar(buf, pos);
  1.2920 +                    if (c == '\u0000') {
  1.2921 +                        break stateloop;
  1.2922 +                    }
  1.2923 +                    /*
  1.2924 +                     * Unlike the definition is the spec, this state does not
  1.2925 +                     * return a value and never requires the caller to
  1.2926 +                     * backtrack. This state takes care of emitting characters
  1.2927 +                     * or appending to the current attribute value. It also
  1.2928 +                     * takes care of that in the case when consuming the
  1.2929 +                     * character reference fails.
  1.2930 +                     */
  1.2931 +                    /*
  1.2932 +                     * This section defines how to consume a character
  1.2933 +                     * reference. This definition is used when parsing character
  1.2934 +                     * references in text and in attributes.
  1.2935 +                     * 
  1.2936 +                     * The behavior depends on the identity of the next
  1.2937 +                     * character (the one immediately after the U+0026 AMPERSAND
  1.2938 +                     * character):
  1.2939 +                     */
  1.2940 +                    switch (c) {
  1.2941 +                        case ' ':
  1.2942 +                        case '\t':
  1.2943 +                        case '\n':
  1.2944 +                        case '\r': // we'll reconsume!
  1.2945 +                        case '\u000C':
  1.2946 +                        case '<':
  1.2947 +                        case '&':
  1.2948 +                            emitOrAppendStrBuf(returnState);
  1.2949 +                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.2950 +                                cstart = pos;
  1.2951 +                            }
  1.2952 +                            reconsume = true;
  1.2953 +                            state = transition(state, returnState, reconsume, pos);
  1.2954 +                            continue stateloop;
  1.2955 +                        case '#':
  1.2956 +                            /*
  1.2957 +                             * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
  1.2958 +                             * SIGN.
  1.2959 +                             */
  1.2960 +                            appendStrBuf('#');
  1.2961 +                            state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
  1.2962 +                            continue stateloop;
  1.2963 +                        default:
  1.2964 +                            if (c == additional) {
  1.2965 +                                emitOrAppendStrBuf(returnState);
  1.2966 +                                reconsume = true;
  1.2967 +                                state = transition(state, returnState, reconsume, pos);
  1.2968 +                                continue stateloop;
  1.2969 +                            }
  1.2970 +                            if (c >= 'a' && c <= 'z') {
  1.2971 +                                firstCharKey = c - 'a' + 26;
  1.2972 +                            } else if (c >= 'A' && c <= 'Z') {
  1.2973 +                                firstCharKey = c - 'A';
  1.2974 +                            } else {
  1.2975 +                                // No match
  1.2976 +                                /*
  1.2977 +                                 * If no match can be made, then this is a parse
  1.2978 +                                 * error.
  1.2979 +                                 */
  1.2980 +                                errNoNamedCharacterMatch();
  1.2981 +                                emitOrAppendStrBuf(returnState);
  1.2982 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.2983 +                                    cstart = pos;
  1.2984 +                                }
  1.2985 +                                reconsume = true;
  1.2986 +                                state = transition(state, returnState, reconsume, pos);
  1.2987 +                                continue stateloop;
  1.2988 +                            }
  1.2989 +                            // Didn't fail yet
  1.2990 +                            appendStrBuf(c);
  1.2991 +                            state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
  1.2992 +                            // FALL THROUGH continue stateloop;
  1.2993 +                    }
  1.2994 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.2995 +                case CHARACTER_REFERENCE_HILO_LOOKUP:
  1.2996 +                    {
  1.2997 +                        if (++pos == endPos) {
  1.2998 +                            break stateloop;
  1.2999 +                        }
  1.3000 +                        c = checkChar(buf, pos);
  1.3001 +                        if (c == '\u0000') {
  1.3002 +                            break stateloop;
  1.3003 +                        }
  1.3004 +                        /*
  1.3005 +                         * The data structure is as follows:
  1.3006 +                         * 
  1.3007 +                         * HILO_ACCEL is a two-dimensional int array whose major
  1.3008 +                         * index corresponds to the second character of the
  1.3009 +                         * character reference (code point as index) and the
  1.3010 +                         * minor index corresponds to the first character of the
  1.3011 +                         * character reference (packed so that A-Z runs from 0
  1.3012 +                         * to 25 and a-z runs from 26 to 51). This layout makes
  1.3013 +                         * it easier to use the sparseness of the data structure
  1.3014 +                         * to omit parts of it: The second dimension of the
  1.3015 +                         * table is null when no character reference starts with
  1.3016 +                         * the character corresponding to that row.
  1.3017 +                         * 
  1.3018 +                         * The int value HILO_ACCEL (by these indeces) is zero
  1.3019 +                         * if there exists no character reference starting with
  1.3020 +                         * that two-letter prefix. Otherwise, the value is an
  1.3021 +                         * int that packs two shorts so that the higher short is
  1.3022 +                         * the index of the highest character reference name
  1.3023 +                         * with that prefix in NAMES and the lower short
  1.3024 +                         * corresponds to the index of the lowest character
  1.3025 +                         * reference name with that prefix. (It happens that the
  1.3026 +                         * first two character reference names share their
  1.3027 +                         * prefix so the packed int cannot be 0 by packing the
  1.3028 +                         * two shorts.)
  1.3029 +                         * 
  1.3030 +                         * NAMES is an array of byte arrays where each byte
  1.3031 +                         * array encodes the name of a character references as
  1.3032 +                         * ASCII. The names omit the first two letters of the
  1.3033 +                         * name. (Since storing the first two letters would be
  1.3034 +                         * redundant with the data contained in HILO_ACCEL.) The
  1.3035 +                         * entries are lexically sorted.
  1.3036 +                         * 
  1.3037 +                         * For a given index in NAMES, the same index in VALUES
  1.3038 +                         * contains the corresponding expansion as an array of
  1.3039 +                         * two UTF-16 code units (either the character and
  1.3040 +                         * U+0000 or a suggogate pair).
  1.3041 +                         */
  1.3042 +                        int hilo = 0;
  1.3043 +                        if (c <= 'z') {
  1.3044 +                            @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c];
  1.3045 +                            if (row != null) {
  1.3046 +                                hilo = row[firstCharKey];
  1.3047 +                            }
  1.3048 +                        }
  1.3049 +                        if (hilo == 0) {
  1.3050 +                            /*
  1.3051 +                             * If no match can be made, then this is a parse
  1.3052 +                             * error.
  1.3053 +                             */
  1.3054 +                            errNoNamedCharacterMatch();
  1.3055 +                            emitOrAppendStrBuf(returnState);
  1.3056 +                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3057 +                                cstart = pos;
  1.3058 +                            }
  1.3059 +                            reconsume = true;
  1.3060 +                            state = transition(state, returnState, reconsume, pos);
  1.3061 +                            continue stateloop;
  1.3062 +                        }
  1.3063 +                        // Didn't fail yet
  1.3064 +                        appendStrBuf(c);
  1.3065 +                        lo = hilo & 0xFFFF;
  1.3066 +                        hi = hilo >> 16;
  1.3067 +                        entCol = -1;
  1.3068 +                        candidate = -1;
  1.3069 +                        strBufMark = 0;
  1.3070 +                        state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
  1.3071 +                        // FALL THROUGH continue stateloop;
  1.3072 +                    }
  1.3073 +                case CHARACTER_REFERENCE_TAIL:
  1.3074 +                    outer: for (;;) {
  1.3075 +                        if (++pos == endPos) {
  1.3076 +                            break stateloop;
  1.3077 +                        }
  1.3078 +                        c = checkChar(buf, pos);
  1.3079 +                        if (c == '\u0000') {
  1.3080 +                            break stateloop;
  1.3081 +                        }
  1.3082 +                        entCol++;
  1.3083 +                        /*
  1.3084 +                         * Consume the maximum number of characters possible,
  1.3085 +                         * with the consumed characters matching one of the
  1.3086 +                         * identifiers in the first column of the named
  1.3087 +                         * character references table (in a case-sensitive
  1.3088 +                         * manner).
  1.3089 +                         */
  1.3090 +                        loloop: for (;;) {
  1.3091 +                            if (hi < lo) {
  1.3092 +                                break outer;
  1.3093 +                            }
  1.3094 +                            if (entCol == NamedCharacters.NAMES[lo].length()) {
  1.3095 +                                candidate = lo;
  1.3096 +                                strBufMark = strBufLen;
  1.3097 +                                lo++;
  1.3098 +                            } else if (entCol > NamedCharacters.NAMES[lo].length()) {
  1.3099 +                                break outer;
  1.3100 +                            } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
  1.3101 +                                lo++;
  1.3102 +                            } else {
  1.3103 +                                break loloop;
  1.3104 +                            }
  1.3105 +                        }
  1.3106 +
  1.3107 +                        hiloop: for (;;) {
  1.3108 +                            if (hi < lo) {
  1.3109 +                                break outer;
  1.3110 +                            }
  1.3111 +                            if (entCol == NamedCharacters.NAMES[hi].length()) {
  1.3112 +                                break hiloop;
  1.3113 +                            }
  1.3114 +                            if (entCol > NamedCharacters.NAMES[hi].length()) {
  1.3115 +                                break outer;
  1.3116 +                            } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
  1.3117 +                                hi--;
  1.3118 +                            } else {
  1.3119 +                                break hiloop;
  1.3120 +                            }
  1.3121 +                        }
  1.3122 +
  1.3123 +                        if (c == ';') {
  1.3124 +                            // If we see a semicolon, there cannot be a 
  1.3125 +                            // longer match. Break the loop. However, before
  1.3126 +                            // breaking, take the longest match so far as the 
  1.3127 +                            // candidate, if we are just about to complete a 
  1.3128 +                            // match.
  1.3129 +                            if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
  1.3130 +                                candidate = lo;
  1.3131 +                                strBufMark = strBufLen;
  1.3132 +                            }                            
  1.3133 +                            break outer;
  1.3134 +                        }
  1.3135 +                        
  1.3136 +                        if (hi < lo) {
  1.3137 +                            break outer;
  1.3138 +                        }
  1.3139 +                        appendStrBuf(c);
  1.3140 +                        continue;
  1.3141 +                    }
  1.3142 +
  1.3143 +                    if (candidate == -1) {
  1.3144 +                        // reconsume deals with CR, LF or nul
  1.3145 +                        /*
  1.3146 +                         * If no match can be made, then this is a parse error.
  1.3147 +                         */
  1.3148 +                        errNoNamedCharacterMatch();
  1.3149 +                        emitOrAppendStrBuf(returnState);
  1.3150 +                        if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3151 +                            cstart = pos;
  1.3152 +                        }
  1.3153 +                        reconsume = true;
  1.3154 +                        state = transition(state, returnState, reconsume, pos);
  1.3155 +                        continue stateloop;
  1.3156 +                    } else {
  1.3157 +                        // c can't be CR, LF or nul if we got here
  1.3158 +                        @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
  1.3159 +                        if (candidateName.length() == 0
  1.3160 +                                || candidateName.charAt(candidateName.length() - 1) != ';') {
  1.3161 +                            /*
  1.3162 +                             * If the last character matched is not a U+003B
  1.3163 +                             * SEMICOLON (;), there is a parse error.
  1.3164 +                             */
  1.3165 +                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.3166 +                                /*
  1.3167 +                                 * If the entity is being consumed as part of an
  1.3168 +                                 * attribute, and the last character matched is
  1.3169 +                                 * not a U+003B SEMICOLON (;),
  1.3170 +                                 */
  1.3171 +                                char ch;
  1.3172 +                                if (strBufMark == strBufLen) {
  1.3173 +                                    ch = c;
  1.3174 +                                } else {
  1.3175 +                                    // if (strBufOffset != -1) {
  1.3176 +                                    // ch = buf[strBufOffset + strBufMark];
  1.3177 +                                    // } else {
  1.3178 +                                    ch = strBuf[strBufMark];
  1.3179 +                                    // }
  1.3180 +                                }
  1.3181 +                                if (ch == '=' || (ch >= '0' && ch <= '9')
  1.3182 +                                        || (ch >= 'A' && ch <= 'Z')
  1.3183 +                                        || (ch >= 'a' && ch <= 'z')) {
  1.3184 +                                    /*
  1.3185 +                                     * and the next character is either a U+003D
  1.3186 +                                     * EQUALS SIGN character (=) or in the range
  1.3187 +                                     * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
  1.3188 +                                     * U+0041 LATIN CAPITAL LETTER A to U+005A
  1.3189 +                                     * LATIN CAPITAL LETTER Z, or U+0061 LATIN
  1.3190 +                                     * SMALL LETTER A to U+007A LATIN SMALL
  1.3191 +                                     * LETTER Z, then, for historical reasons,
  1.3192 +                                     * all the characters that were matched
  1.3193 +                                     * after the U+0026 AMPERSAND (&) must be
  1.3194 +                                     * unconsumed, and nothing is returned.
  1.3195 +                                     */
  1.3196 +                                    errNoNamedCharacterMatch();
  1.3197 +                                    appendStrBufToLongStrBuf();
  1.3198 +                                    reconsume = true;
  1.3199 +                                    state = transition(state, returnState, reconsume, pos);
  1.3200 +                                    continue stateloop;
  1.3201 +                                }
  1.3202 +                            }
  1.3203 +                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.3204 +                                errUnescapedAmpersandInterpretedAsCharacterReference();
  1.3205 +                            } else {
  1.3206 +                                errNotSemicolonTerminated();
  1.3207 +                            }
  1.3208 +                        }
  1.3209 +
  1.3210 +                        /*
  1.3211 +                         * Otherwise, return a character token for the character
  1.3212 +                         * corresponding to the entity name (as given by the
  1.3213 +                         * second column of the named character references
  1.3214 +                         * table).
  1.3215 +                         */
  1.3216 +                        // CPPONLY: completedNamedCharacterReference();
  1.3217 +                        @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
  1.3218 +                        if (
  1.3219 +                        // [NOCPP[
  1.3220 +                        val.length == 1
  1.3221 +                        // ]NOCPP]
  1.3222 +                        // CPPONLY: val[1] == 0
  1.3223 +                        ) {
  1.3224 +                            emitOrAppendOne(val, returnState);
  1.3225 +                        } else {
  1.3226 +                            emitOrAppendTwo(val, returnState);
  1.3227 +                        }
  1.3228 +                        // this is so complicated!
  1.3229 +                        if (strBufMark < strBufLen) {
  1.3230 +                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.3231 +                                for (int i = strBufMark; i < strBufLen; i++) {
  1.3232 +                                    appendLongStrBuf(strBuf[i]);
  1.3233 +                                }
  1.3234 +                            } else {
  1.3235 +                                tokenHandler.characters(strBuf, strBufMark,
  1.3236 +                                        strBufLen - strBufMark);
  1.3237 +                            }
  1.3238 +                        }
  1.3239 +                        // Check if we broke out early with c being the last
  1.3240 +                        // character that matched as opposed to being the
  1.3241 +                        // first one that didn't match. In the case of an 
  1.3242 +                        // early break, the next run on text should start
  1.3243 +                        // *after* the current character and the current 
  1.3244 +                        // character shouldn't be reconsumed.
  1.3245 +                        boolean earlyBreak = (c == ';' && strBufMark == strBufLen);
  1.3246 +                        if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3247 +                            cstart = earlyBreak ? pos + 1 : pos;
  1.3248 +                        }
  1.3249 +                        reconsume = !earlyBreak;
  1.3250 +                        state = transition(state, returnState, reconsume, pos);
  1.3251 +                        continue stateloop;
  1.3252 +                        /*
  1.3253 +                         * If the markup contains I'm &notit; I tell you, the
  1.3254 +                         * entity is parsed as "not", as in, I'm ¬it; I tell
  1.3255 +                         * you. But if the markup was I'm &notin; I tell you,
  1.3256 +                         * the entity would be parsed as "notin;", resulting in
  1.3257 +                         * I'm ∉ I tell you.
  1.3258 +                         */
  1.3259 +                    }
  1.3260 +                    // XXX reorder point
  1.3261 +                case CONSUME_NCR:
  1.3262 +                    if (++pos == endPos) {
  1.3263 +                        break stateloop;
  1.3264 +                    }
  1.3265 +                    c = checkChar(buf, pos);
  1.3266 +                    prevValue = -1;
  1.3267 +                    value = 0;
  1.3268 +                    seenDigits = false;
  1.3269 +                    /*
  1.3270 +                     * The behavior further depends on the character after the
  1.3271 +                     * U+0023 NUMBER SIGN:
  1.3272 +                     */
  1.3273 +                    switch (c) {
  1.3274 +                        case 'x':
  1.3275 +                        case 'X':
  1.3276 +
  1.3277 +                            /*
  1.3278 +                             * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
  1.3279 +                             * LETTER X Consume the X.
  1.3280 +                             * 
  1.3281 +                             * Follow the steps below, but using the range of
  1.3282 +                             * characters U+0030 DIGIT ZERO through to U+0039
  1.3283 +                             * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
  1.3284 +                             * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
  1.3285 +                             * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
  1.3286 +                             * LETTER F (in other words, 0-9, A-F, a-f).
  1.3287 +                             * 
  1.3288 +                             * When it comes to interpreting the number,
  1.3289 +                             * interpret it as a hexadecimal number.
  1.3290 +                             */
  1.3291 +                            appendStrBuf(c);
  1.3292 +                            state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
  1.3293 +                            continue stateloop;
  1.3294 +                        default:
  1.3295 +                            /*
  1.3296 +                             * Anything else Follow the steps below, but using
  1.3297 +                             * the range of characters U+0030 DIGIT ZERO through
  1.3298 +                             * to U+0039 DIGIT NINE (i.e. just 0-9).
  1.3299 +                             * 
  1.3300 +                             * When it comes to interpreting the number,
  1.3301 +                             * interpret it as a decimal number.
  1.3302 +                             */
  1.3303 +                            reconsume = true;
  1.3304 +                            state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos);
  1.3305 +                            // FALL THROUGH continue stateloop;
  1.3306 +                    }
  1.3307 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.3308 +                case DECIMAL_NRC_LOOP:
  1.3309 +                    decimalloop: for (;;) {
  1.3310 +                        if (reconsume) {
  1.3311 +                            reconsume = false;
  1.3312 +                        } else {
  1.3313 +                            if (++pos == endPos) {
  1.3314 +                                break stateloop;
  1.3315 +                            }
  1.3316 +                            c = checkChar(buf, pos);
  1.3317 +                        }
  1.3318 +                        // Deal with overflow gracefully
  1.3319 +                        if (value < prevValue) {
  1.3320 +                            value = 0x110000; // Value above Unicode range but
  1.3321 +                            // within int
  1.3322 +                            // range
  1.3323 +                        }
  1.3324 +                        prevValue = value;
  1.3325 +                        /*
  1.3326 +                         * Consume as many characters as match the range of
  1.3327 +                         * characters given above.
  1.3328 +                         */
  1.3329 +                        if (c >= '0' && c <= '9') {
  1.3330 +                            seenDigits = true;
  1.3331 +                            value *= 10;
  1.3332 +                            value += c - '0';
  1.3333 +                            continue;
  1.3334 +                        } else if (c == ';') {
  1.3335 +                            if (seenDigits) {
  1.3336 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3337 +                                    cstart = pos + 1;
  1.3338 +                                }
  1.3339 +                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
  1.3340 +                                // FALL THROUGH continue stateloop;
  1.3341 +                                break decimalloop;
  1.3342 +                            } else {
  1.3343 +                                errNoDigitsInNCR();
  1.3344 +                                appendStrBuf(';');
  1.3345 +                                emitOrAppendStrBuf(returnState);
  1.3346 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3347 +                                    cstart = pos + 1;
  1.3348 +                                }
  1.3349 +                                state = transition(state, returnState, reconsume, pos);
  1.3350 +                                continue stateloop;
  1.3351 +                            }
  1.3352 +                        } else {
  1.3353 +                            /*
  1.3354 +                             * If no characters match the range, then don't
  1.3355 +                             * consume any characters (and unconsume the U+0023
  1.3356 +                             * NUMBER SIGN character and, if appropriate, the X
  1.3357 +                             * character). This is a parse error; nothing is
  1.3358 +                             * returned.
  1.3359 +                             * 
  1.3360 +                             * Otherwise, if the next character is a U+003B
  1.3361 +                             * SEMICOLON, consume that too. If it isn't, there
  1.3362 +                             * is a parse error.
  1.3363 +                             */
  1.3364 +                            if (!seenDigits) {
  1.3365 +                                errNoDigitsInNCR();
  1.3366 +                                emitOrAppendStrBuf(returnState);
  1.3367 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3368 +                                    cstart = pos;
  1.3369 +                                }
  1.3370 +                                reconsume = true;
  1.3371 +                                state = transition(state, returnState, reconsume, pos);
  1.3372 +                                continue stateloop;
  1.3373 +                            } else {
  1.3374 +                                errCharRefLacksSemicolon();
  1.3375 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3376 +                                    cstart = pos;
  1.3377 +                                }
  1.3378 +                                reconsume = true;
  1.3379 +                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
  1.3380 +                                // FALL THROUGH continue stateloop;
  1.3381 +                                break decimalloop;
  1.3382 +                            }
  1.3383 +                        }
  1.3384 +                    }
  1.3385 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.3386 +                case HANDLE_NCR_VALUE:
  1.3387 +                    // WARNING previous state sets reconsume
  1.3388 +                    // XXX inline this case if the method size can take it
  1.3389 +                    handleNcrValue(returnState);
  1.3390 +                    state = transition(state, returnState, reconsume, pos);
  1.3391 +                    continue stateloop;
  1.3392 +                    // XXX reorder point
  1.3393 +                case HEX_NCR_LOOP:
  1.3394 +                    for (;;) {
  1.3395 +                        if (++pos == endPos) {
  1.3396 +                            break stateloop;
  1.3397 +                        }
  1.3398 +                        c = checkChar(buf, pos);
  1.3399 +                        // Deal with overflow gracefully
  1.3400 +                        if (value < prevValue) {
  1.3401 +                            value = 0x110000; // Value above Unicode range but
  1.3402 +                            // within int
  1.3403 +                            // range
  1.3404 +                        }
  1.3405 +                        prevValue = value;
  1.3406 +                        /*
  1.3407 +                         * Consume as many characters as match the range of
  1.3408 +                         * characters given above.
  1.3409 +                         */
  1.3410 +                        if (c >= '0' && c <= '9') {
  1.3411 +                            seenDigits = true;
  1.3412 +                            value *= 16;
  1.3413 +                            value += c - '0';
  1.3414 +                            continue;
  1.3415 +                        } else if (c >= 'A' && c <= 'F') {
  1.3416 +                            seenDigits = true;
  1.3417 +                            value *= 16;
  1.3418 +                            value += c - 'A' + 10;
  1.3419 +                            continue;
  1.3420 +                        } else if (c >= 'a' && c <= 'f') {
  1.3421 +                            seenDigits = true;
  1.3422 +                            value *= 16;
  1.3423 +                            value += c - 'a' + 10;
  1.3424 +                            continue;
  1.3425 +                        } else if (c == ';') {
  1.3426 +                            if (seenDigits) {
  1.3427 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3428 +                                    cstart = pos + 1;
  1.3429 +                                }
  1.3430 +                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
  1.3431 +                                continue stateloop;
  1.3432 +                            } else {
  1.3433 +                                errNoDigitsInNCR();
  1.3434 +                                appendStrBuf(';');
  1.3435 +                                emitOrAppendStrBuf(returnState);
  1.3436 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3437 +                                    cstart = pos + 1;
  1.3438 +                                }
  1.3439 +                                state = transition(state, returnState, reconsume, pos);
  1.3440 +                                continue stateloop;
  1.3441 +                            }
  1.3442 +                        } else {
  1.3443 +                            /*
  1.3444 +                             * If no characters match the range, then don't
  1.3445 +                             * consume any characters (and unconsume the U+0023
  1.3446 +                             * NUMBER SIGN character and, if appropriate, the X
  1.3447 +                             * character). This is a parse error; nothing is
  1.3448 +                             * returned.
  1.3449 +                             * 
  1.3450 +                             * Otherwise, if the next character is a U+003B
  1.3451 +                             * SEMICOLON, consume that too. If it isn't, there
  1.3452 +                             * is a parse error.
  1.3453 +                             */
  1.3454 +                            if (!seenDigits) {
  1.3455 +                                errNoDigitsInNCR();
  1.3456 +                                emitOrAppendStrBuf(returnState);
  1.3457 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3458 +                                    cstart = pos;
  1.3459 +                                }
  1.3460 +                                reconsume = true;
  1.3461 +                                state = transition(state, returnState, reconsume, pos);
  1.3462 +                                continue stateloop;
  1.3463 +                            } else {
  1.3464 +                                errCharRefLacksSemicolon();
  1.3465 +                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
  1.3466 +                                    cstart = pos;
  1.3467 +                                }
  1.3468 +                                reconsume = true;
  1.3469 +                                state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
  1.3470 +                                continue stateloop;
  1.3471 +                            }
  1.3472 +                        }
  1.3473 +                    }
  1.3474 +                    // XXX reorder point
  1.3475 +                case PLAINTEXT:
  1.3476 +                    plaintextloop: for (;;) {
  1.3477 +                        if (reconsume) {
  1.3478 +                            reconsume = false;
  1.3479 +                        } else {
  1.3480 +                            if (++pos == endPos) {
  1.3481 +                                break stateloop;
  1.3482 +                            }
  1.3483 +                            c = checkChar(buf, pos);
  1.3484 +                        }
  1.3485 +                        switch (c) {
  1.3486 +                            case '\u0000':
  1.3487 +                                emitPlaintextReplacementCharacter(buf, pos);
  1.3488 +                                continue;
  1.3489 +                            case '\r':
  1.3490 +                                emitCarriageReturn(buf, pos);
  1.3491 +                                break stateloop;
  1.3492 +                            case '\n':
  1.3493 +                                silentLineFeed();
  1.3494 +                            default:
  1.3495 +                                /*
  1.3496 +                                 * Anything else Emit the current input
  1.3497 +                                 * character as a character token. Stay in the
  1.3498 +                                 * RAWTEXT state.
  1.3499 +                                 */
  1.3500 +                                continue;
  1.3501 +                        }
  1.3502 +                    }
  1.3503 +                    // XXX reorder point
  1.3504 +                case CLOSE_TAG_OPEN:
  1.3505 +                    if (++pos == endPos) {
  1.3506 +                        break stateloop;
  1.3507 +                    }
  1.3508 +                    c = checkChar(buf, pos);
  1.3509 +                    /*
  1.3510 +                     * Otherwise, if the content model flag is set to the PCDATA
  1.3511 +                     * state, or if the next few characters do match that tag
  1.3512 +                     * name, consume the next input character:
  1.3513 +                     */
  1.3514 +                    switch (c) {
  1.3515 +                        case '>':
  1.3516 +                            /* U+003E GREATER-THAN SIGN (>) Parse error. */
  1.3517 +                            errLtSlashGt();
  1.3518 +                            /*
  1.3519 +                             * Switch to the data state.
  1.3520 +                             */
  1.3521 +                            cstart = pos + 1;
  1.3522 +                            state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.3523 +                            continue stateloop;
  1.3524 +                        case '\r':
  1.3525 +                            silentCarriageReturn();
  1.3526 +                            /* Anything else Parse error. */
  1.3527 +                            errGarbageAfterLtSlash();
  1.3528 +                            /*
  1.3529 +                             * Switch to the bogus comment state.
  1.3530 +                             */
  1.3531 +                            clearLongStrBufAndAppend('\n');
  1.3532 +                            state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.3533 +                            break stateloop;
  1.3534 +                        case '\n':
  1.3535 +                            silentLineFeed();
  1.3536 +                            /* Anything else Parse error. */
  1.3537 +                            errGarbageAfterLtSlash();
  1.3538 +                            /*
  1.3539 +                             * Switch to the bogus comment state.
  1.3540 +                             */
  1.3541 +                            clearLongStrBufAndAppend('\n');
  1.3542 +                            state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.3543 +                            continue stateloop;
  1.3544 +                        case '\u0000':
  1.3545 +                            c = '\uFFFD';
  1.3546 +                            // fall thru
  1.3547 +                        default:
  1.3548 +                            if (c >= 'A' && c <= 'Z') {
  1.3549 +                                c += 0x20;
  1.3550 +                            }
  1.3551 +                            if (c >= 'a' && c <= 'z') {
  1.3552 +                                /*
  1.3553 +                                 * U+0061 LATIN SMALL LETTER A through to U+007A
  1.3554 +                                 * LATIN SMALL LETTER Z Create a new end tag
  1.3555 +                                 * token,
  1.3556 +                                 */
  1.3557 +                                endTag = true;
  1.3558 +                                /*
  1.3559 +                                 * set its tag name to the input character,
  1.3560 +                                 */
  1.3561 +                                clearStrBufAndAppend(c);
  1.3562 +                                /*
  1.3563 +                                 * then switch to the tag name state. (Don't
  1.3564 +                                 * emit the token yet; further details will be
  1.3565 +                                 * filled in before it is emitted.)
  1.3566 +                                 */
  1.3567 +                                state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
  1.3568 +                                continue stateloop;
  1.3569 +                            } else {
  1.3570 +                                /* Anything else Parse error. */
  1.3571 +                                errGarbageAfterLtSlash();
  1.3572 +                                /*
  1.3573 +                                 * Switch to the bogus comment state.
  1.3574 +                                 */
  1.3575 +                                clearLongStrBufAndAppend(c);
  1.3576 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.3577 +                                continue stateloop;
  1.3578 +                            }
  1.3579 +                    }
  1.3580 +                    // XXX reorder point
  1.3581 +                case RCDATA:
  1.3582 +                    rcdataloop: for (;;) {
  1.3583 +                        if (reconsume) {
  1.3584 +                            reconsume = false;
  1.3585 +                        } else {
  1.3586 +                            if (++pos == endPos) {
  1.3587 +                                break stateloop;
  1.3588 +                            }
  1.3589 +                            c = checkChar(buf, pos);
  1.3590 +                        }
  1.3591 +                        switch (c) {
  1.3592 +                            case '&':
  1.3593 +                                /*
  1.3594 +                                 * U+0026 AMPERSAND (&) Switch to the character
  1.3595 +                                 * reference in RCDATA state.
  1.3596 +                                 */
  1.3597 +                                flushChars(buf, pos);
  1.3598 +                                clearStrBufAndAppend(c);
  1.3599 +                                additional = '\u0000';
  1.3600 +                                returnState = state;
  1.3601 +                                state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
  1.3602 +                                continue stateloop;
  1.3603 +                            case '<':
  1.3604 +                                /*
  1.3605 +                                 * U+003C LESS-THAN SIGN (<) Switch to the
  1.3606 +                                 * RCDATA less-than sign state.
  1.3607 +                                 */
  1.3608 +                                flushChars(buf, pos);
  1.3609 +
  1.3610 +                                returnState = state;
  1.3611 +                                state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
  1.3612 +                                continue stateloop;
  1.3613 +                            case '\u0000':
  1.3614 +                                emitReplacementCharacter(buf, pos);
  1.3615 +                                continue;
  1.3616 +                            case '\r':
  1.3617 +                                emitCarriageReturn(buf, pos);
  1.3618 +                                break stateloop;
  1.3619 +                            case '\n':
  1.3620 +                                silentLineFeed();
  1.3621 +                            default:
  1.3622 +                                /*
  1.3623 +                                 * Emit the current input character as a
  1.3624 +                                 * character token. Stay in the RCDATA state.
  1.3625 +                                 */
  1.3626 +                                continue;
  1.3627 +                        }
  1.3628 +                    }
  1.3629 +                    // XXX reorder point
  1.3630 +                case RAWTEXT:
  1.3631 +                    rawtextloop: for (;;) {
  1.3632 +                        if (reconsume) {
  1.3633 +                            reconsume = false;
  1.3634 +                        } else {
  1.3635 +                            if (++pos == endPos) {
  1.3636 +                                break stateloop;
  1.3637 +                            }
  1.3638 +                            c = checkChar(buf, pos);
  1.3639 +                        }
  1.3640 +                        switch (c) {
  1.3641 +                            case '<':
  1.3642 +                                /*
  1.3643 +                                 * U+003C LESS-THAN SIGN (<) Switch to the
  1.3644 +                                 * RAWTEXT less-than sign state.
  1.3645 +                                 */
  1.3646 +                                flushChars(buf, pos);
  1.3647 +
  1.3648 +                                returnState = state;
  1.3649 +                                state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
  1.3650 +                                break rawtextloop;
  1.3651 +                            // FALL THRU continue stateloop;
  1.3652 +                            case '\u0000':
  1.3653 +                                emitReplacementCharacter(buf, pos);
  1.3654 +                                continue;
  1.3655 +                            case '\r':
  1.3656 +                                emitCarriageReturn(buf, pos);
  1.3657 +                                break stateloop;
  1.3658 +                            case '\n':
  1.3659 +                                silentLineFeed();
  1.3660 +                            default:
  1.3661 +                                /*
  1.3662 +                                 * Emit the current input character as a
  1.3663 +                                 * character token. Stay in the RAWTEXT state.
  1.3664 +                                 */
  1.3665 +                                continue;
  1.3666 +                        }
  1.3667 +                    }
  1.3668 +                    // XXX fallthru don't reorder
  1.3669 +                case RAWTEXT_RCDATA_LESS_THAN_SIGN:
  1.3670 +                    rawtextrcdatalessthansignloop: for (;;) {
  1.3671 +                        if (++pos == endPos) {
  1.3672 +                            break stateloop;
  1.3673 +                        }
  1.3674 +                        c = checkChar(buf, pos);
  1.3675 +                        switch (c) {
  1.3676 +                            case '/':
  1.3677 +                                /*
  1.3678 +                                 * U+002F SOLIDUS (/) Set the temporary buffer
  1.3679 +                                 * to the empty string. Switch to the script
  1.3680 +                                 * data end tag open state.
  1.3681 +                                 */
  1.3682 +                                index = 0;
  1.3683 +                                clearStrBuf();
  1.3684 +                                state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
  1.3685 +                                break rawtextrcdatalessthansignloop;
  1.3686 +                            // FALL THRU continue stateloop;
  1.3687 +                            default:
  1.3688 +                                /*
  1.3689 +                                 * Otherwise, emit a U+003C LESS-THAN SIGN
  1.3690 +                                 * character token
  1.3691 +                                 */
  1.3692 +                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.3693 +                                /*
  1.3694 +                                 * and reconsume the current input character in
  1.3695 +                                 * the data state.
  1.3696 +                                 */
  1.3697 +                                cstart = pos;
  1.3698 +                                reconsume = true;
  1.3699 +                                state = transition(state, returnState, reconsume, pos);
  1.3700 +                                continue stateloop;
  1.3701 +                        }
  1.3702 +                    }
  1.3703 +                    // XXX fall thru. don't reorder.
  1.3704 +                case NON_DATA_END_TAG_NAME:
  1.3705 +                    for (;;) {
  1.3706 +                        if (++pos == endPos) {
  1.3707 +                            break stateloop;
  1.3708 +                        }
  1.3709 +                        c = checkChar(buf, pos);
  1.3710 +                        /*
  1.3711 +                         * ASSERT! when entering this state, set index to 0 and
  1.3712 +                         * call clearStrBuf() assert (contentModelElement !=
  1.3713 +                         * null); Let's implement the above without lookahead.
  1.3714 +                         * strBuf is the 'temporary buffer'.
  1.3715 +                         */
  1.3716 +                        if (index < endTagExpectationAsArray.length) {
  1.3717 +                            char e = endTagExpectationAsArray[index];
  1.3718 +                            char folded = c;
  1.3719 +                            if (c >= 'A' && c <= 'Z') {
  1.3720 +                                folded += 0x20;
  1.3721 +                            }
  1.3722 +                            if (folded != e) {
  1.3723 +                                // [NOCPP[
  1.3724 +                                errHtml4LtSlashInRcdata(folded);
  1.3725 +                                // ]NOCPP]
  1.3726 +                                tokenHandler.characters(Tokenizer.LT_SOLIDUS,
  1.3727 +                                        0, 2);
  1.3728 +                                emitStrBuf();
  1.3729 +                                cstart = pos;
  1.3730 +                                reconsume = true;
  1.3731 +                                state = transition(state, returnState, reconsume, pos);
  1.3732 +                                continue stateloop;
  1.3733 +                            }
  1.3734 +                            appendStrBuf(c);
  1.3735 +                            index++;
  1.3736 +                            continue;
  1.3737 +                        } else {
  1.3738 +                            endTag = true;
  1.3739 +                            // XXX replace contentModelElement with different
  1.3740 +                            // type
  1.3741 +                            tagName = endTagExpectation;
  1.3742 +                            switch (c) {
  1.3743 +                                case '\r':
  1.3744 +                                    silentCarriageReturn();
  1.3745 +                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.3746 +                                    break stateloop;
  1.3747 +                                case '\n':
  1.3748 +                                    silentLineFeed();
  1.3749 +                                    // fall thru
  1.3750 +                                case ' ':
  1.3751 +                                case '\t':
  1.3752 +                                case '\u000C':
  1.3753 +                                    /*
  1.3754 +                                     * U+0009 CHARACTER TABULATION U+000A LINE
  1.3755 +                                     * FEED (LF) U+000C FORM FEED (FF) U+0020
  1.3756 +                                     * SPACE If the current end tag token is an
  1.3757 +                                     * appropriate end tag token, then switch to
  1.3758 +                                     * the before attribute name state.
  1.3759 +                                     */
  1.3760 +                                    state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos);
  1.3761 +                                    continue stateloop;
  1.3762 +                                case '/':
  1.3763 +                                    /*
  1.3764 +                                     * U+002F SOLIDUS (/) If the current end tag
  1.3765 +                                     * token is an appropriate end tag token,
  1.3766 +                                     * then switch to the self-closing start tag
  1.3767 +                                     * state.
  1.3768 +                                     */
  1.3769 +                                    state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos);
  1.3770 +                                    continue stateloop;
  1.3771 +                                case '>':
  1.3772 +                                    /*
  1.3773 +                                     * U+003E GREATER-THAN SIGN (>) If the
  1.3774 +                                     * current end tag token is an appropriate
  1.3775 +                                     * end tag token, then emit the current tag
  1.3776 +                                     * token and switch to the data state.
  1.3777 +                                     */
  1.3778 +                                    state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos);
  1.3779 +                                    if (shouldSuspend) {
  1.3780 +                                        break stateloop;
  1.3781 +                                    }
  1.3782 +                                    continue stateloop;
  1.3783 +                                default:
  1.3784 +                                    /*
  1.3785 +                                     * Emit a U+003C LESS-THAN SIGN character
  1.3786 +                                     * token, a U+002F SOLIDUS character token,
  1.3787 +                                     * a character token for each of the
  1.3788 +                                     * characters in the temporary buffer (in
  1.3789 +                                     * the order they were added to the buffer),
  1.3790 +                                     * and reconsume the current input character
  1.3791 +                                     * in the RAWTEXT state.
  1.3792 +                                     */
  1.3793 +                                    // [NOCPP[
  1.3794 +                                    errWarnLtSlashInRcdata();
  1.3795 +                                    // ]NOCPP]
  1.3796 +                                    tokenHandler.characters(
  1.3797 +                                            Tokenizer.LT_SOLIDUS, 0, 2);
  1.3798 +                                    emitStrBuf();
  1.3799 +                                    if (c == '\u0000') {
  1.3800 +                                        emitReplacementCharacter(buf, pos);
  1.3801 +                                    } else {
  1.3802 +                                        cstart = pos; // don't drop the
  1.3803 +                                        // character
  1.3804 +                                    }
  1.3805 +                                    state = transition(state, returnState, reconsume, pos);
  1.3806 +                                    continue stateloop;
  1.3807 +                            }
  1.3808 +                        }
  1.3809 +                    }
  1.3810 +                    // XXX reorder point
  1.3811 +                    // BEGIN HOTSPOT WORKAROUND
  1.3812 +                case BOGUS_COMMENT:
  1.3813 +                    boguscommentloop: for (;;) {
  1.3814 +                        if (reconsume) {
  1.3815 +                            reconsume = false;
  1.3816 +                        } else {
  1.3817 +                            if (++pos == endPos) {
  1.3818 +                                break stateloop;
  1.3819 +                            }
  1.3820 +                            c = checkChar(buf, pos);
  1.3821 +                        }
  1.3822 +                        /*
  1.3823 +                         * Consume every character up to and including the first
  1.3824 +                         * U+003E GREATER-THAN SIGN character (>) or the end of
  1.3825 +                         * the file (EOF), whichever comes first. Emit a comment
  1.3826 +                         * token whose data is the concatenation of all the
  1.3827 +                         * characters starting from and including the character
  1.3828 +                         * that caused the state machine to switch into the
  1.3829 +                         * bogus comment state, up to and including the
  1.3830 +                         * character immediately before the last consumed
  1.3831 +                         * character (i.e. up to the character just before the
  1.3832 +                         * U+003E or EOF character). (If the comment was started
  1.3833 +                         * by the end of the file (EOF), the token is empty.)
  1.3834 +                         * 
  1.3835 +                         * Switch to the data state.
  1.3836 +                         * 
  1.3837 +                         * If the end of the file was reached, reconsume the EOF
  1.3838 +                         * character.
  1.3839 +                         */
  1.3840 +                        switch (c) {
  1.3841 +                            case '>':
  1.3842 +                                emitComment(0, pos);
  1.3843 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.3844 +                                continue stateloop;
  1.3845 +                            case '-':
  1.3846 +                                appendLongStrBuf(c);
  1.3847 +                                state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
  1.3848 +                                break boguscommentloop;
  1.3849 +                            case '\r':
  1.3850 +                                appendLongStrBufCarriageReturn();
  1.3851 +                                break stateloop;
  1.3852 +                            case '\n':
  1.3853 +                                appendLongStrBufLineFeed();
  1.3854 +                                continue;
  1.3855 +                            case '\u0000':
  1.3856 +                                c = '\uFFFD';
  1.3857 +                                // fall thru
  1.3858 +                            default:
  1.3859 +                                appendLongStrBuf(c);
  1.3860 +                                continue;
  1.3861 +                        }
  1.3862 +                    }
  1.3863 +                    // FALLTHRU DON'T REORDER
  1.3864 +                case BOGUS_COMMENT_HYPHEN:
  1.3865 +                    boguscommenthyphenloop: for (;;) {
  1.3866 +                        if (++pos == endPos) {
  1.3867 +                            break stateloop;
  1.3868 +                        }
  1.3869 +                        c = checkChar(buf, pos);
  1.3870 +                        switch (c) {
  1.3871 +                            case '>':
  1.3872 +                                // [NOCPP[
  1.3873 +                                maybeAppendSpaceToBogusComment();
  1.3874 +                                // ]NOCPP]
  1.3875 +                                emitComment(0, pos);
  1.3876 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.3877 +                                continue stateloop;
  1.3878 +                            case '-':
  1.3879 +                                appendSecondHyphenToBogusComment();
  1.3880 +                                continue boguscommenthyphenloop;
  1.3881 +                            case '\r':
  1.3882 +                                appendLongStrBufCarriageReturn();
  1.3883 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.3884 +                                break stateloop;
  1.3885 +                            case '\n':
  1.3886 +                                appendLongStrBufLineFeed();
  1.3887 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.3888 +                                continue stateloop;
  1.3889 +                            case '\u0000':
  1.3890 +                                c = '\uFFFD';
  1.3891 +                                // fall thru
  1.3892 +                            default:
  1.3893 +                                appendLongStrBuf(c);
  1.3894 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.3895 +                                continue stateloop;
  1.3896 +                        }
  1.3897 +                    }
  1.3898 +                    // XXX reorder point
  1.3899 +                case SCRIPT_DATA:
  1.3900 +                    scriptdataloop: for (;;) {
  1.3901 +                        if (reconsume) {
  1.3902 +                            reconsume = false;
  1.3903 +                        } else {
  1.3904 +                            if (++pos == endPos) {
  1.3905 +                                break stateloop;
  1.3906 +                            }
  1.3907 +                            c = checkChar(buf, pos);
  1.3908 +                        }
  1.3909 +                        switch (c) {
  1.3910 +                            case '<':
  1.3911 +                                /*
  1.3912 +                                 * U+003C LESS-THAN SIGN (<) Switch to the
  1.3913 +                                 * script data less-than sign state.
  1.3914 +                                 */
  1.3915 +                                flushChars(buf, pos);
  1.3916 +                                returnState = state;
  1.3917 +                                state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
  1.3918 +                                break scriptdataloop; // FALL THRU continue
  1.3919 +                            // stateloop;
  1.3920 +                            case '\u0000':
  1.3921 +                                emitReplacementCharacter(buf, pos);
  1.3922 +                                continue;
  1.3923 +                            case '\r':
  1.3924 +                                emitCarriageReturn(buf, pos);
  1.3925 +                                break stateloop;
  1.3926 +                            case '\n':
  1.3927 +                                silentLineFeed();
  1.3928 +                            default:
  1.3929 +                                /*
  1.3930 +                                 * Anything else Emit the current input
  1.3931 +                                 * character as a character token. Stay in the
  1.3932 +                                 * script data state.
  1.3933 +                                 */
  1.3934 +                                continue;
  1.3935 +                        }
  1.3936 +                    }
  1.3937 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.3938 +                case SCRIPT_DATA_LESS_THAN_SIGN:
  1.3939 +                    scriptdatalessthansignloop: for (;;) {
  1.3940 +                        if (++pos == endPos) {
  1.3941 +                            break stateloop;
  1.3942 +                        }
  1.3943 +                        c = checkChar(buf, pos);
  1.3944 +                        switch (c) {
  1.3945 +                            case '/':
  1.3946 +                                /*
  1.3947 +                                 * U+002F SOLIDUS (/) Set the temporary buffer
  1.3948 +                                 * to the empty string. Switch to the script
  1.3949 +                                 * data end tag open state.
  1.3950 +                                 */
  1.3951 +                                index = 0;
  1.3952 +                                clearStrBuf();
  1.3953 +                                state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
  1.3954 +                                continue stateloop;
  1.3955 +                            case '!':
  1.3956 +                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.3957 +                                cstart = pos;
  1.3958 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos);
  1.3959 +                                break scriptdatalessthansignloop; // FALL THRU
  1.3960 +                            // continue
  1.3961 +                            // stateloop;
  1.3962 +                            default:
  1.3963 +                                /*
  1.3964 +                                 * Otherwise, emit a U+003C LESS-THAN SIGN
  1.3965 +                                 * character token
  1.3966 +                                 */
  1.3967 +                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.3968 +                                /*
  1.3969 +                                 * and reconsume the current input character in
  1.3970 +                                 * the data state.
  1.3971 +                                 */
  1.3972 +                                cstart = pos;
  1.3973 +                                reconsume = true;
  1.3974 +                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
  1.3975 +                                continue stateloop;
  1.3976 +                        }
  1.3977 +                    }
  1.3978 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.3979 +                case SCRIPT_DATA_ESCAPE_START:
  1.3980 +                    scriptdataescapestartloop: for (;;) {
  1.3981 +                        if (++pos == endPos) {
  1.3982 +                            break stateloop;
  1.3983 +                        }
  1.3984 +                        c = checkChar(buf, pos);
  1.3985 +                        /*
  1.3986 +                         * Consume the next input character:
  1.3987 +                         */
  1.3988 +                        switch (c) {
  1.3989 +                            case '-':
  1.3990 +                                /*
  1.3991 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.3992 +                                 * HYPHEN-MINUS character token. Switch to the
  1.3993 +                                 * script data escape start dash state.
  1.3994 +                                 */
  1.3995 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos);
  1.3996 +                                break scriptdataescapestartloop; // FALL THRU
  1.3997 +                            // continue
  1.3998 +                            // stateloop;
  1.3999 +                            default:
  1.4000 +                                /*
  1.4001 +                                 * Anything else Reconsume the current input
  1.4002 +                                 * character in the script data state.
  1.4003 +                                 */
  1.4004 +                                reconsume = true;
  1.4005 +                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
  1.4006 +                                continue stateloop;
  1.4007 +                        }
  1.4008 +                    }
  1.4009 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4010 +                case SCRIPT_DATA_ESCAPE_START_DASH:
  1.4011 +                    scriptdataescapestartdashloop: for (;;) {
  1.4012 +                        if (++pos == endPos) {
  1.4013 +                            break stateloop;
  1.4014 +                        }
  1.4015 +                        c = checkChar(buf, pos);
  1.4016 +                        /*
  1.4017 +                         * Consume the next input character:
  1.4018 +                         */
  1.4019 +                        switch (c) {
  1.4020 +                            case '-':
  1.4021 +                                /*
  1.4022 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.4023 +                                 * HYPHEN-MINUS character token. Switch to the
  1.4024 +                                 * script data escaped dash dash state.
  1.4025 +                                 */
  1.4026 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
  1.4027 +                                break scriptdataescapestartdashloop;
  1.4028 +                            // continue stateloop;
  1.4029 +                            default:
  1.4030 +                                /*
  1.4031 +                                 * Anything else Reconsume the current input
  1.4032 +                                 * character in the script data state.
  1.4033 +                                 */
  1.4034 +                                reconsume = true;
  1.4035 +                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
  1.4036 +                                continue stateloop;
  1.4037 +                        }
  1.4038 +                    }
  1.4039 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4040 +                case SCRIPT_DATA_ESCAPED_DASH_DASH:
  1.4041 +                    scriptdataescapeddashdashloop: for (;;) {
  1.4042 +                        if (++pos == endPos) {
  1.4043 +                            break stateloop;
  1.4044 +                        }
  1.4045 +                        c = checkChar(buf, pos);
  1.4046 +                        /*
  1.4047 +                         * Consume the next input character:
  1.4048 +                         */
  1.4049 +                        switch (c) {
  1.4050 +                            case '-':
  1.4051 +                                /*
  1.4052 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.4053 +                                 * HYPHEN-MINUS character token. Stay in the
  1.4054 +                                 * script data escaped dash dash state.
  1.4055 +                                 */
  1.4056 +                                continue;
  1.4057 +                            case '<':
  1.4058 +                                /*
  1.4059 +                                 * U+003C LESS-THAN SIGN (<) Switch to the
  1.4060 +                                 * script data escaped less-than sign state.
  1.4061 +                                 */
  1.4062 +                                flushChars(buf, pos);
  1.4063 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
  1.4064 +                                continue stateloop;
  1.4065 +                            case '>':
  1.4066 +                                /*
  1.4067 +                                 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
  1.4068 +                                 * GREATER-THAN SIGN character token. Switch to
  1.4069 +                                 * the script data state.
  1.4070 +                                 */
  1.4071 +                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
  1.4072 +                                continue stateloop;
  1.4073 +                            case '\u0000':
  1.4074 +                                emitReplacementCharacter(buf, pos);
  1.4075 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4076 +                                break scriptdataescapeddashdashloop;
  1.4077 +                            case '\r':
  1.4078 +                                emitCarriageReturn(buf, pos);
  1.4079 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4080 +                                break stateloop;
  1.4081 +                            case '\n':
  1.4082 +                                silentLineFeed();
  1.4083 +                            default:
  1.4084 +                                /*
  1.4085 +                                 * Anything else Emit the current input
  1.4086 +                                 * character as a character token. Switch to the
  1.4087 +                                 * script data escaped state.
  1.4088 +                                 */
  1.4089 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4090 +                                break scriptdataescapeddashdashloop;
  1.4091 +                            // continue stateloop;
  1.4092 +                        }
  1.4093 +                    }
  1.4094 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4095 +                case SCRIPT_DATA_ESCAPED:
  1.4096 +                    scriptdataescapedloop: for (;;) {
  1.4097 +                        if (reconsume) {
  1.4098 +                            reconsume = false;
  1.4099 +                        } else {
  1.4100 +                            if (++pos == endPos) {
  1.4101 +                                break stateloop;
  1.4102 +                            }
  1.4103 +                            c = checkChar(buf, pos);
  1.4104 +                        }
  1.4105 +                        /*
  1.4106 +                         * Consume the next input character:
  1.4107 +                         */
  1.4108 +                        switch (c) {
  1.4109 +                            case '-':
  1.4110 +                                /*
  1.4111 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.4112 +                                 * HYPHEN-MINUS character token. Switch to the
  1.4113 +                                 * script data escaped dash state.
  1.4114 +                                 */
  1.4115 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
  1.4116 +                                break scriptdataescapedloop; // FALL THRU
  1.4117 +                            // continue
  1.4118 +                            // stateloop;
  1.4119 +                            case '<':
  1.4120 +                                /*
  1.4121 +                                 * U+003C LESS-THAN SIGN (<) Switch to the
  1.4122 +                                 * script data escaped less-than sign state.
  1.4123 +                                 */
  1.4124 +                                flushChars(buf, pos);
  1.4125 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
  1.4126 +                                continue stateloop;
  1.4127 +                            case '\u0000':
  1.4128 +                                emitReplacementCharacter(buf, pos);
  1.4129 +                                continue;
  1.4130 +                            case '\r':
  1.4131 +                                emitCarriageReturn(buf, pos);
  1.4132 +                                break stateloop;
  1.4133 +                            case '\n':
  1.4134 +                                silentLineFeed();
  1.4135 +                            default:
  1.4136 +                                /*
  1.4137 +                                 * Anything else Emit the current input
  1.4138 +                                 * character as a character token. Stay in the
  1.4139 +                                 * script data escaped state.
  1.4140 +                                 */
  1.4141 +                                continue;
  1.4142 +                        }
  1.4143 +                    }
  1.4144 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4145 +                case SCRIPT_DATA_ESCAPED_DASH:
  1.4146 +                    scriptdataescapeddashloop: for (;;) {
  1.4147 +                        if (++pos == endPos) {
  1.4148 +                            break stateloop;
  1.4149 +                        }
  1.4150 +                        c = checkChar(buf, pos);
  1.4151 +                        /*
  1.4152 +                         * Consume the next input character:
  1.4153 +                         */
  1.4154 +                        switch (c) {
  1.4155 +                            case '-':
  1.4156 +                                /*
  1.4157 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.4158 +                                 * HYPHEN-MINUS character token. Switch to the
  1.4159 +                                 * script data escaped dash dash state.
  1.4160 +                                 */
  1.4161 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos);
  1.4162 +                                continue stateloop;
  1.4163 +                            case '<':
  1.4164 +                                /*
  1.4165 +                                 * U+003C LESS-THAN SIGN (<) Switch to the
  1.4166 +                                 * script data escaped less-than sign state.
  1.4167 +                                 */
  1.4168 +                                flushChars(buf, pos);
  1.4169 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
  1.4170 +                                break scriptdataescapeddashloop;
  1.4171 +                            // continue stateloop;
  1.4172 +                            case '\u0000':
  1.4173 +                                emitReplacementCharacter(buf, pos);
  1.4174 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4175 +                                continue stateloop;
  1.4176 +                            case '\r':
  1.4177 +                                emitCarriageReturn(buf, pos);
  1.4178 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4179 +                                break stateloop;
  1.4180 +                            case '\n':
  1.4181 +                                silentLineFeed();
  1.4182 +                            default:
  1.4183 +                                /*
  1.4184 +                                 * Anything else Emit the current input
  1.4185 +                                 * character as a character token. Switch to the
  1.4186 +                                 * script data escaped state.
  1.4187 +                                 */
  1.4188 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4189 +                                continue stateloop;
  1.4190 +                        }
  1.4191 +                    }
  1.4192 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4193 +                case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
  1.4194 +                    scriptdataescapedlessthanloop: for (;;) {
  1.4195 +                        if (++pos == endPos) {
  1.4196 +                            break stateloop;
  1.4197 +                        }
  1.4198 +                        c = checkChar(buf, pos);
  1.4199 +                        /*
  1.4200 +                         * Consume the next input character:
  1.4201 +                         */
  1.4202 +                        switch (c) {
  1.4203 +                            case '/':
  1.4204 +                                /*
  1.4205 +                                 * U+002F SOLIDUS (/) Set the temporary buffer
  1.4206 +                                 * to the empty string. Switch to the script
  1.4207 +                                 * data escaped end tag open state.
  1.4208 +                                 */
  1.4209 +                                index = 0;
  1.4210 +                                clearStrBuf();
  1.4211 +                                returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
  1.4212 +                                state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos);
  1.4213 +                                continue stateloop;
  1.4214 +                            case 'S':
  1.4215 +                            case 's':
  1.4216 +                                /*
  1.4217 +                                 * U+0041 LATIN CAPITAL LETTER A through to
  1.4218 +                                 * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
  1.4219 +                                 * LESS-THAN SIGN character token and the
  1.4220 +                                 * current input character as a character token.
  1.4221 +                                 */
  1.4222 +                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.4223 +                                cstart = pos;
  1.4224 +                                index = 1;
  1.4225 +                                /*
  1.4226 +                                 * Set the temporary buffer to the empty string.
  1.4227 +                                 * Append the lowercase version of the current
  1.4228 +                                 * input character (add 0x0020 to the
  1.4229 +                                 * character's code point) to the temporary
  1.4230 +                                 * buffer. Switch to the script data double
  1.4231 +                                 * escape start state.
  1.4232 +                                 */
  1.4233 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos);
  1.4234 +                                break scriptdataescapedlessthanloop;
  1.4235 +                            // continue stateloop;
  1.4236 +                            default:
  1.4237 +                                /*
  1.4238 +                                 * Anything else Emit a U+003C LESS-THAN SIGN
  1.4239 +                                 * character token and reconsume the current
  1.4240 +                                 * input character in the script data escaped
  1.4241 +                                 * state.
  1.4242 +                                 */
  1.4243 +                                tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.4244 +                                cstart = pos;
  1.4245 +                                reconsume = true;
  1.4246 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4247 +                                continue stateloop;
  1.4248 +                        }
  1.4249 +                    }
  1.4250 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4251 +                case SCRIPT_DATA_DOUBLE_ESCAPE_START:
  1.4252 +                    scriptdatadoubleescapestartloop: for (;;) {
  1.4253 +                        if (++pos == endPos) {
  1.4254 +                            break stateloop;
  1.4255 +                        }
  1.4256 +                        c = checkChar(buf, pos);
  1.4257 +                        assert index > 0;
  1.4258 +                        if (index < 6) { // SCRIPT_ARR.length
  1.4259 +                            char folded = c;
  1.4260 +                            if (c >= 'A' && c <= 'Z') {
  1.4261 +                                folded += 0x20;
  1.4262 +                            }
  1.4263 +                            if (folded != Tokenizer.SCRIPT_ARR[index]) {
  1.4264 +                                reconsume = true;
  1.4265 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4266 +                                continue stateloop;
  1.4267 +                            }
  1.4268 +                            index++;
  1.4269 +                            continue;
  1.4270 +                        }
  1.4271 +                        switch (c) {
  1.4272 +                            case '\r':
  1.4273 +                                emitCarriageReturn(buf, pos);
  1.4274 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4275 +                                break stateloop;
  1.4276 +                            case '\n':
  1.4277 +                                silentLineFeed();
  1.4278 +                            case ' ':
  1.4279 +                            case '\t':
  1.4280 +                            case '\u000C':
  1.4281 +                            case '/':
  1.4282 +                            case '>':
  1.4283 +                                /*
  1.4284 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4285 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.4286 +                                 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
  1.4287 +                                 * (>) Emit the current input character as a
  1.4288 +                                 * character token. If the temporary buffer is
  1.4289 +                                 * the string "script", then switch to the
  1.4290 +                                 * script data double escaped state.
  1.4291 +                                 */
  1.4292 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4293 +                                break scriptdatadoubleescapestartloop;
  1.4294 +                            // continue stateloop;
  1.4295 +                            default:
  1.4296 +                                /*
  1.4297 +                                 * Anything else Reconsume the current input
  1.4298 +                                 * character in the script data escaped state.
  1.4299 +                                 */
  1.4300 +                                reconsume = true;
  1.4301 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4302 +                                continue stateloop;
  1.4303 +                        }
  1.4304 +                    }
  1.4305 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4306 +                case SCRIPT_DATA_DOUBLE_ESCAPED:
  1.4307 +                    scriptdatadoubleescapedloop: for (;;) {
  1.4308 +                        if (reconsume) {
  1.4309 +                            reconsume = false;
  1.4310 +                        } else {
  1.4311 +                            if (++pos == endPos) {
  1.4312 +                                break stateloop;
  1.4313 +                            }
  1.4314 +                            c = checkChar(buf, pos);
  1.4315 +                        }
  1.4316 +                        /*
  1.4317 +                         * Consume the next input character:
  1.4318 +                         */
  1.4319 +                        switch (c) {
  1.4320 +                            case '-':
  1.4321 +                                /*
  1.4322 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.4323 +                                 * HYPHEN-MINUS character token. Switch to the
  1.4324 +                                 * script data double escaped dash state.
  1.4325 +                                 */
  1.4326 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos);
  1.4327 +                                break scriptdatadoubleescapedloop; // FALL THRU
  1.4328 +                            // continue
  1.4329 +                            // stateloop;
  1.4330 +                            case '<':
  1.4331 +                                /*
  1.4332 +                                 * U+003C LESS-THAN SIGN (<) Emit a U+003C
  1.4333 +                                 * LESS-THAN SIGN character token. Switch to the
  1.4334 +                                 * script data double escaped less-than sign
  1.4335 +                                 * state.
  1.4336 +                                 */
  1.4337 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
  1.4338 +                                continue stateloop;
  1.4339 +                            case '\u0000':
  1.4340 +                                emitReplacementCharacter(buf, pos);
  1.4341 +                                continue;
  1.4342 +                            case '\r':
  1.4343 +                                emitCarriageReturn(buf, pos);
  1.4344 +                                break stateloop;
  1.4345 +                            case '\n':
  1.4346 +                                silentLineFeed();
  1.4347 +                            default:
  1.4348 +                                /*
  1.4349 +                                 * Anything else Emit the current input
  1.4350 +                                 * character as a character token. Stay in the
  1.4351 +                                 * script data double escaped state.
  1.4352 +                                 */
  1.4353 +                                continue;
  1.4354 +                        }
  1.4355 +                    }
  1.4356 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4357 +                case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
  1.4358 +                    scriptdatadoubleescapeddashloop: for (;;) {
  1.4359 +                        if (++pos == endPos) {
  1.4360 +                            break stateloop;
  1.4361 +                        }
  1.4362 +                        c = checkChar(buf, pos);
  1.4363 +                        /*
  1.4364 +                         * Consume the next input character:
  1.4365 +                         */
  1.4366 +                        switch (c) {
  1.4367 +                            case '-':
  1.4368 +                                /*
  1.4369 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.4370 +                                 * HYPHEN-MINUS character token. Switch to the
  1.4371 +                                 * script data double escaped dash dash state.
  1.4372 +                                 */
  1.4373 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos);
  1.4374 +                                break scriptdatadoubleescapeddashloop;
  1.4375 +                            // continue stateloop;
  1.4376 +                            case '<':
  1.4377 +                                /*
  1.4378 +                                 * U+003C LESS-THAN SIGN (<) Emit a U+003C
  1.4379 +                                 * LESS-THAN SIGN character token. Switch to the
  1.4380 +                                 * script data double escaped less-than sign
  1.4381 +                                 * state.
  1.4382 +                                 */
  1.4383 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
  1.4384 +                                continue stateloop;
  1.4385 +                            case '\u0000':
  1.4386 +                                emitReplacementCharacter(buf, pos);
  1.4387 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4388 +                                continue stateloop;
  1.4389 +                            case '\r':
  1.4390 +                                emitCarriageReturn(buf, pos);
  1.4391 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4392 +                                break stateloop;
  1.4393 +                            case '\n':
  1.4394 +                                silentLineFeed();
  1.4395 +                            default:
  1.4396 +                                /*
  1.4397 +                                 * Anything else Emit the current input
  1.4398 +                                 * character as a character token. Switch to the
  1.4399 +                                 * script data double escaped state.
  1.4400 +                                 */
  1.4401 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4402 +                                continue stateloop;
  1.4403 +                        }
  1.4404 +                    }
  1.4405 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4406 +                case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
  1.4407 +                    scriptdatadoubleescapeddashdashloop: for (;;) {
  1.4408 +                        if (++pos == endPos) {
  1.4409 +                            break stateloop;
  1.4410 +                        }
  1.4411 +                        c = checkChar(buf, pos);
  1.4412 +                        /*
  1.4413 +                         * Consume the next input character:
  1.4414 +                         */
  1.4415 +                        switch (c) {
  1.4416 +                            case '-':
  1.4417 +                                /*
  1.4418 +                                 * U+002D HYPHEN-MINUS (-) Emit a U+002D
  1.4419 +                                 * HYPHEN-MINUS character token. Stay in the
  1.4420 +                                 * script data double escaped dash dash state.
  1.4421 +                                 */
  1.4422 +                                continue;
  1.4423 +                            case '<':
  1.4424 +                                /*
  1.4425 +                                 * U+003C LESS-THAN SIGN (<) Emit a U+003C
  1.4426 +                                 * LESS-THAN SIGN character token. Switch to the
  1.4427 +                                 * script data double escaped less-than sign
  1.4428 +                                 * state.
  1.4429 +                                 */
  1.4430 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos);
  1.4431 +                                break scriptdatadoubleescapeddashdashloop;
  1.4432 +                            case '>':
  1.4433 +                                /*
  1.4434 +                                 * U+003E GREATER-THAN SIGN (>) Emit a U+003E
  1.4435 +                                 * GREATER-THAN SIGN character token. Switch to
  1.4436 +                                 * the script data state.
  1.4437 +                                 */
  1.4438 +                                state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos);
  1.4439 +                                continue stateloop;
  1.4440 +                            case '\u0000':
  1.4441 +                                emitReplacementCharacter(buf, pos);
  1.4442 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4443 +                                continue stateloop;
  1.4444 +                            case '\r':
  1.4445 +                                emitCarriageReturn(buf, pos);
  1.4446 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4447 +                                break stateloop;
  1.4448 +                            case '\n':
  1.4449 +                                silentLineFeed();
  1.4450 +                            default:
  1.4451 +                                /*
  1.4452 +                                 * Anything else Emit the current input
  1.4453 +                                 * character as a character token. Switch to the
  1.4454 +                                 * script data double escaped state.
  1.4455 +                                 */
  1.4456 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4457 +                                continue stateloop;
  1.4458 +                        }
  1.4459 +                    }
  1.4460 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4461 +                case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
  1.4462 +                    scriptdatadoubleescapedlessthanloop: for (;;) {
  1.4463 +                        if (++pos == endPos) {
  1.4464 +                            break stateloop;
  1.4465 +                        }
  1.4466 +                        c = checkChar(buf, pos);
  1.4467 +                        /*
  1.4468 +                         * Consume the next input character:
  1.4469 +                         */
  1.4470 +                        switch (c) {
  1.4471 +                            case '/':
  1.4472 +                                /*
  1.4473 +                                 * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS
  1.4474 +                                 * character token. Set the temporary buffer to
  1.4475 +                                 * the empty string. Switch to the script data
  1.4476 +                                 * double escape end state.
  1.4477 +                                 */
  1.4478 +                                index = 0;
  1.4479 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos);
  1.4480 +                                break scriptdatadoubleescapedlessthanloop;
  1.4481 +                            default:
  1.4482 +                                /*
  1.4483 +                                 * Anything else Reconsume the current input
  1.4484 +                                 * character in the script data double escaped
  1.4485 +                                 * state.
  1.4486 +                                 */
  1.4487 +                                reconsume = true;
  1.4488 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4489 +                                continue stateloop;
  1.4490 +                        }
  1.4491 +                    }
  1.4492 +                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
  1.4493 +                case SCRIPT_DATA_DOUBLE_ESCAPE_END:
  1.4494 +                    scriptdatadoubleescapeendloop: for (;;) {
  1.4495 +                        if (++pos == endPos) {
  1.4496 +                            break stateloop;
  1.4497 +                        }
  1.4498 +                        c = checkChar(buf, pos);
  1.4499 +                        if (index < 6) { // SCRIPT_ARR.length
  1.4500 +                            char folded = c;
  1.4501 +                            if (c >= 'A' && c <= 'Z') {
  1.4502 +                                folded += 0x20;
  1.4503 +                            }
  1.4504 +                            if (folded != Tokenizer.SCRIPT_ARR[index]) {
  1.4505 +                                reconsume = true;
  1.4506 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4507 +                                continue stateloop;
  1.4508 +                            }
  1.4509 +                            index++;
  1.4510 +                            continue;
  1.4511 +                        }
  1.4512 +                        switch (c) {
  1.4513 +                            case '\r':
  1.4514 +                                emitCarriageReturn(buf, pos);
  1.4515 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4516 +                                break stateloop;
  1.4517 +                            case '\n':
  1.4518 +                                silentLineFeed();
  1.4519 +                            case ' ':
  1.4520 +                            case '\t':
  1.4521 +                            case '\u000C':
  1.4522 +                            case '/':
  1.4523 +                            case '>':
  1.4524 +                                /*
  1.4525 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4526 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.4527 +                                 * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN
  1.4528 +                                 * (>) Emit the current input character as a
  1.4529 +                                 * character token. If the temporary buffer is
  1.4530 +                                 * the string "script", then switch to the
  1.4531 +                                 * script data escaped state.
  1.4532 +                                 */
  1.4533 +                                state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos);
  1.4534 +                                continue stateloop;
  1.4535 +                            default:
  1.4536 +                                /*
  1.4537 +                                 * Reconsume the current input character in the
  1.4538 +                                 * script data double escaped state.
  1.4539 +                                 */
  1.4540 +                                reconsume = true;
  1.4541 +                                state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
  1.4542 +                                continue stateloop;
  1.4543 +                        }
  1.4544 +                    }
  1.4545 +                    // XXX reorder point
  1.4546 +                case MARKUP_DECLARATION_OCTYPE:
  1.4547 +                    markupdeclarationdoctypeloop: for (;;) {
  1.4548 +                        if (++pos == endPos) {
  1.4549 +                            break stateloop;
  1.4550 +                        }
  1.4551 +                        c = checkChar(buf, pos);
  1.4552 +                        if (index < 6) { // OCTYPE.length
  1.4553 +                            char folded = c;
  1.4554 +                            if (c >= 'A' && c <= 'Z') {
  1.4555 +                                folded += 0x20;
  1.4556 +                            }
  1.4557 +                            if (folded == Tokenizer.OCTYPE[index]) {
  1.4558 +                                appendLongStrBuf(c);
  1.4559 +                            } else {
  1.4560 +                                errBogusComment();
  1.4561 +                                reconsume = true;
  1.4562 +                                state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
  1.4563 +                                continue stateloop;
  1.4564 +                            }
  1.4565 +                            index++;
  1.4566 +                            continue;
  1.4567 +                        } else {
  1.4568 +                            reconsume = true;
  1.4569 +                            state = transition(state, Tokenizer.DOCTYPE, reconsume, pos);
  1.4570 +                            break markupdeclarationdoctypeloop;
  1.4571 +                            // continue stateloop;
  1.4572 +                        }
  1.4573 +                    }
  1.4574 +                    // FALLTHRU DON'T REORDER
  1.4575 +                case DOCTYPE:
  1.4576 +                    doctypeloop: for (;;) {
  1.4577 +                        if (reconsume) {
  1.4578 +                            reconsume = false;
  1.4579 +                        } else {
  1.4580 +                            if (++pos == endPos) {
  1.4581 +                                break stateloop;
  1.4582 +                            }
  1.4583 +                            c = checkChar(buf, pos);
  1.4584 +                        }
  1.4585 +                        initDoctypeFields();
  1.4586 +                        /*
  1.4587 +                         * Consume the next input character:
  1.4588 +                         */
  1.4589 +                        switch (c) {
  1.4590 +                            case '\r':
  1.4591 +                                silentCarriageReturn();
  1.4592 +                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
  1.4593 +                                break stateloop;
  1.4594 +                            case '\n':
  1.4595 +                                silentLineFeed();
  1.4596 +                                // fall thru
  1.4597 +                            case ' ':
  1.4598 +                            case '\t':
  1.4599 +                            case '\u000C':
  1.4600 +                                /*
  1.4601 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4602 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.4603 +                                 * Switch to the before DOCTYPE name state.
  1.4604 +                                 */
  1.4605 +                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
  1.4606 +                                break doctypeloop;
  1.4607 +                            // continue stateloop;
  1.4608 +                            default:
  1.4609 +                                /*
  1.4610 +                                 * Anything else Parse error.
  1.4611 +                                 */
  1.4612 +                                errMissingSpaceBeforeDoctypeName();
  1.4613 +                                /*
  1.4614 +                                 * Reconsume the current character in the before
  1.4615 +                                 * DOCTYPE name state.
  1.4616 +                                 */
  1.4617 +                                reconsume = true;
  1.4618 +                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos);
  1.4619 +                                break doctypeloop;
  1.4620 +                            // continue stateloop;
  1.4621 +                        }
  1.4622 +                    }
  1.4623 +                    // FALLTHRU DON'T REORDER
  1.4624 +                case BEFORE_DOCTYPE_NAME:
  1.4625 +                    beforedoctypenameloop: for (;;) {
  1.4626 +                        if (reconsume) {
  1.4627 +                            reconsume = false;
  1.4628 +                        } else {
  1.4629 +                            if (++pos == endPos) {
  1.4630 +                                break stateloop;
  1.4631 +                            }
  1.4632 +                            c = checkChar(buf, pos);
  1.4633 +                        }
  1.4634 +                        /*
  1.4635 +                         * Consume the next input character:
  1.4636 +                         */
  1.4637 +                        switch (c) {
  1.4638 +                            case '\r':
  1.4639 +                                silentCarriageReturn();
  1.4640 +                                break stateloop;
  1.4641 +                            case '\n':
  1.4642 +                                silentLineFeed();
  1.4643 +                                // fall thru
  1.4644 +                            case ' ':
  1.4645 +                            case '\t':
  1.4646 +                            case '\u000C':
  1.4647 +                                /*
  1.4648 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4649 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.4650 +                                 * in the before DOCTYPE name state.
  1.4651 +                                 */
  1.4652 +                                continue;
  1.4653 +                            case '>':
  1.4654 +                                /*
  1.4655 +                                 * U+003E GREATER-THAN SIGN (>) Parse error.
  1.4656 +                                 */
  1.4657 +                                errNamelessDoctype();
  1.4658 +                                /*
  1.4659 +                                 * Create a new DOCTYPE token. Set its
  1.4660 +                                 * force-quirks flag to on.
  1.4661 +                                 */
  1.4662 +                                forceQuirks = true;
  1.4663 +                                /*
  1.4664 +                                 * Emit the token.
  1.4665 +                                 */
  1.4666 +                                emitDoctypeToken(pos);
  1.4667 +                                /*
  1.4668 +                                 * Switch to the data state.
  1.4669 +                                 */
  1.4670 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.4671 +                                continue stateloop;
  1.4672 +                            case '\u0000':
  1.4673 +                                c = '\uFFFD';
  1.4674 +                                // fall thru
  1.4675 +                            default:
  1.4676 +                                if (c >= 'A' && c <= 'Z') {
  1.4677 +                                    /*
  1.4678 +                                     * U+0041 LATIN CAPITAL LETTER A through to
  1.4679 +                                     * U+005A LATIN CAPITAL LETTER Z Create a
  1.4680 +                                     * new DOCTYPE token. Set the token's name
  1.4681 +                                     * to the lowercase version of the input
  1.4682 +                                     * character (add 0x0020 to the character's
  1.4683 +                                     * code point).
  1.4684 +                                     */
  1.4685 +                                    c += 0x20;
  1.4686 +                                }
  1.4687 +                                /* Anything else Create a new DOCTYPE token. */
  1.4688 +                                /*
  1.4689 +                                 * Set the token's name name to the current
  1.4690 +                                 * input character.
  1.4691 +                                 */
  1.4692 +                                clearStrBufAndAppend(c);
  1.4693 +                                /*
  1.4694 +                                 * Switch to the DOCTYPE name state.
  1.4695 +                                 */
  1.4696 +                                state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos);
  1.4697 +                                break beforedoctypenameloop;
  1.4698 +                            // continue stateloop;
  1.4699 +                        }
  1.4700 +                    }
  1.4701 +                    // FALLTHRU DON'T REORDER
  1.4702 +                case DOCTYPE_NAME:
  1.4703 +                    doctypenameloop: for (;;) {
  1.4704 +                        if (++pos == endPos) {
  1.4705 +                            break stateloop;
  1.4706 +                        }
  1.4707 +                        c = checkChar(buf, pos);
  1.4708 +                        /*
  1.4709 +                         * Consume the next input character:
  1.4710 +                         */
  1.4711 +                        switch (c) {
  1.4712 +                            case '\r':
  1.4713 +                                silentCarriageReturn();
  1.4714 +                                strBufToDoctypeName();
  1.4715 +                                state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
  1.4716 +                                break stateloop;
  1.4717 +                            case '\n':
  1.4718 +                                silentLineFeed();
  1.4719 +                                // fall thru
  1.4720 +                            case ' ':
  1.4721 +                            case '\t':
  1.4722 +                            case '\u000C':
  1.4723 +                                /*
  1.4724 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4725 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.4726 +                                 * Switch to the after DOCTYPE name state.
  1.4727 +                                 */
  1.4728 +                                strBufToDoctypeName();
  1.4729 +                                state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos);
  1.4730 +                                break doctypenameloop;
  1.4731 +                            // continue stateloop;
  1.4732 +                            case '>':
  1.4733 +                                /*
  1.4734 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.4735 +                                 * DOCTYPE token.
  1.4736 +                                 */
  1.4737 +                                strBufToDoctypeName();
  1.4738 +                                emitDoctypeToken(pos);
  1.4739 +                                /*
  1.4740 +                                 * Switch to the data state.
  1.4741 +                                 */
  1.4742 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.4743 +                                continue stateloop;
  1.4744 +                            case '\u0000':
  1.4745 +                                c = '\uFFFD';
  1.4746 +                                // fall thru
  1.4747 +                            default:
  1.4748 +                                /*
  1.4749 +                                 * U+0041 LATIN CAPITAL LETTER A through to
  1.4750 +                                 * U+005A LATIN CAPITAL LETTER Z Append the
  1.4751 +                                 * lowercase version of the input character (add
  1.4752 +                                 * 0x0020 to the character's code point) to the
  1.4753 +                                 * current DOCTYPE token's name.
  1.4754 +                                 */
  1.4755 +                                if (c >= 'A' && c <= 'Z') {
  1.4756 +                                    c += 0x0020;
  1.4757 +                                }
  1.4758 +                                /*
  1.4759 +                                 * Anything else Append the current input
  1.4760 +                                 * character to the current DOCTYPE token's
  1.4761 +                                 * name.
  1.4762 +                                 */
  1.4763 +                                appendStrBuf(c);
  1.4764 +                                /*
  1.4765 +                                 * Stay in the DOCTYPE name state.
  1.4766 +                                 */
  1.4767 +                                continue;
  1.4768 +                        }
  1.4769 +                    }
  1.4770 +                    // FALLTHRU DON'T REORDER
  1.4771 +                case AFTER_DOCTYPE_NAME:
  1.4772 +                    afterdoctypenameloop: for (;;) {
  1.4773 +                        if (++pos == endPos) {
  1.4774 +                            break stateloop;
  1.4775 +                        }
  1.4776 +                        c = checkChar(buf, pos);
  1.4777 +                        /*
  1.4778 +                         * Consume the next input character:
  1.4779 +                         */
  1.4780 +                        switch (c) {
  1.4781 +                            case '\r':
  1.4782 +                                silentCarriageReturn();
  1.4783 +                                break stateloop;
  1.4784 +                            case '\n':
  1.4785 +                                silentLineFeed();
  1.4786 +                                // fall thru
  1.4787 +                            case ' ':
  1.4788 +                            case '\t':
  1.4789 +                            case '\u000C':
  1.4790 +                                /*
  1.4791 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4792 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.4793 +                                 * in the after DOCTYPE name state.
  1.4794 +                                 */
  1.4795 +                                continue;
  1.4796 +                            case '>':
  1.4797 +                                /*
  1.4798 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.4799 +                                 * DOCTYPE token.
  1.4800 +                                 */
  1.4801 +                                emitDoctypeToken(pos);
  1.4802 +                                /*
  1.4803 +                                 * Switch to the data state.
  1.4804 +                                 */
  1.4805 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.4806 +                                continue stateloop;
  1.4807 +                            case 'p':
  1.4808 +                            case 'P':
  1.4809 +                                index = 0;
  1.4810 +                                state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos);
  1.4811 +                                break afterdoctypenameloop;
  1.4812 +                            // continue stateloop;
  1.4813 +                            case 's':
  1.4814 +                            case 'S':
  1.4815 +                                index = 0;
  1.4816 +                                state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos);
  1.4817 +                                continue stateloop;
  1.4818 +                            default:
  1.4819 +                                /*
  1.4820 +                                 * Otherwise, this is the parse error.
  1.4821 +                                 */
  1.4822 +                                bogusDoctype();
  1.4823 +
  1.4824 +                                /*
  1.4825 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.4826 +                                 * on.
  1.4827 +                                 */
  1.4828 +                                // done by bogusDoctype();
  1.4829 +                                /*
  1.4830 +                                 * Switch to the bogus DOCTYPE state.
  1.4831 +                                 */
  1.4832 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.4833 +                                continue stateloop;
  1.4834 +                        }
  1.4835 +                    }
  1.4836 +                    // FALLTHRU DON'T REORDER
  1.4837 +                case DOCTYPE_UBLIC:
  1.4838 +                    doctypeublicloop: for (;;) {
  1.4839 +                        if (++pos == endPos) {
  1.4840 +                            break stateloop;
  1.4841 +                        }
  1.4842 +                        c = checkChar(buf, pos);
  1.4843 +                        /*
  1.4844 +                         * If the six characters starting from the current input
  1.4845 +                         * character are an ASCII case-insensitive match for the
  1.4846 +                         * word "PUBLIC", then consume those characters and
  1.4847 +                         * switch to the before DOCTYPE public identifier state.
  1.4848 +                         */
  1.4849 +                        if (index < 5) { // UBLIC.length
  1.4850 +                            char folded = c;
  1.4851 +                            if (c >= 'A' && c <= 'Z') {
  1.4852 +                                folded += 0x20;
  1.4853 +                            }
  1.4854 +                            if (folded != Tokenizer.UBLIC[index]) {
  1.4855 +                                bogusDoctype();
  1.4856 +                                // forceQuirks = true;
  1.4857 +                                reconsume = true;
  1.4858 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.4859 +                                continue stateloop;
  1.4860 +                            }
  1.4861 +                            index++;
  1.4862 +                            continue;
  1.4863 +                        } else {
  1.4864 +                            reconsume = true;
  1.4865 +                            state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
  1.4866 +                            break doctypeublicloop;
  1.4867 +                            // continue stateloop;
  1.4868 +                        }
  1.4869 +                    }
  1.4870 +                    // FALLTHRU DON'T REORDER
  1.4871 +                case AFTER_DOCTYPE_PUBLIC_KEYWORD:
  1.4872 +                    afterdoctypepublickeywordloop: for (;;) {
  1.4873 +                        if (reconsume) {
  1.4874 +                            reconsume = false;
  1.4875 +                        } else {
  1.4876 +                            if (++pos == endPos) {
  1.4877 +                                break stateloop;
  1.4878 +                            }
  1.4879 +                            c = checkChar(buf, pos);
  1.4880 +                        }
  1.4881 +                        /*
  1.4882 +                         * Consume the next input character:
  1.4883 +                         */
  1.4884 +                        switch (c) {
  1.4885 +                            case '\r':
  1.4886 +                                silentCarriageReturn();
  1.4887 +                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
  1.4888 +                                break stateloop;
  1.4889 +                            case '\n':
  1.4890 +                                silentLineFeed();
  1.4891 +                                // fall thru
  1.4892 +                            case ' ':
  1.4893 +                            case '\t':
  1.4894 +                            case '\u000C':
  1.4895 +                                /*
  1.4896 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4897 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.4898 +                                 * Switch to the before DOCTYPE public
  1.4899 +                                 * identifier state.
  1.4900 +                                 */
  1.4901 +                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
  1.4902 +                                break afterdoctypepublickeywordloop;
  1.4903 +                            // FALL THROUGH continue stateloop
  1.4904 +                            case '"':
  1.4905 +                                /*
  1.4906 +                                 * U+0022 QUOTATION MARK (") Parse Error.
  1.4907 +                                 */
  1.4908 +                                errNoSpaceBetweenDoctypePublicKeywordAndQuote();
  1.4909 +                                /*
  1.4910 +                                 * Set the DOCTYPE token's public identifier to
  1.4911 +                                 * the empty string (not missing),
  1.4912 +                                 */
  1.4913 +                                clearLongStrBuf();
  1.4914 +                                /*
  1.4915 +                                 * then switch to the DOCTYPE public identifier
  1.4916 +                                 * (double-quoted) state.
  1.4917 +                                 */
  1.4918 +                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
  1.4919 +                                continue stateloop;
  1.4920 +                            case '\'':
  1.4921 +                                /*
  1.4922 +                                 * U+0027 APOSTROPHE (') Parse Error.
  1.4923 +                                 */
  1.4924 +                                errNoSpaceBetweenDoctypePublicKeywordAndQuote();
  1.4925 +                                /*
  1.4926 +                                 * Set the DOCTYPE token's public identifier to
  1.4927 +                                 * the empty string (not missing),
  1.4928 +                                 */
  1.4929 +                                clearLongStrBuf();
  1.4930 +                                /*
  1.4931 +                                 * then switch to the DOCTYPE public identifier
  1.4932 +                                 * (single-quoted) state.
  1.4933 +                                 */
  1.4934 +                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
  1.4935 +                                continue stateloop;
  1.4936 +                            case '>':
  1.4937 +                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
  1.4938 +                                errExpectedPublicId();
  1.4939 +                                /*
  1.4940 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.4941 +                                 * on.
  1.4942 +                                 */
  1.4943 +                                forceQuirks = true;
  1.4944 +                                /*
  1.4945 +                                 * Emit that DOCTYPE token.
  1.4946 +                                 */
  1.4947 +                                emitDoctypeToken(pos);
  1.4948 +                                /*
  1.4949 +                                 * Switch to the data state.
  1.4950 +                                 */
  1.4951 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.4952 +                                continue stateloop;
  1.4953 +                            default:
  1.4954 +                                bogusDoctype();
  1.4955 +                                /*
  1.4956 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.4957 +                                 * on.
  1.4958 +                                 */
  1.4959 +                                // done by bogusDoctype();
  1.4960 +                                /*
  1.4961 +                                 * Switch to the bogus DOCTYPE state.
  1.4962 +                                 */
  1.4963 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.4964 +                                continue stateloop;
  1.4965 +                        }
  1.4966 +                    }
  1.4967 +                    // FALLTHRU DON'T REORDER
  1.4968 +                case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
  1.4969 +                    beforedoctypepublicidentifierloop: for (;;) {
  1.4970 +                        if (++pos == endPos) {
  1.4971 +                            break stateloop;
  1.4972 +                        }
  1.4973 +                        c = checkChar(buf, pos);
  1.4974 +                        /*
  1.4975 +                         * Consume the next input character:
  1.4976 +                         */
  1.4977 +                        switch (c) {
  1.4978 +                            case '\r':
  1.4979 +                                silentCarriageReturn();
  1.4980 +                                break stateloop;
  1.4981 +                            case '\n':
  1.4982 +                                silentLineFeed();
  1.4983 +                                // fall thru
  1.4984 +                            case ' ':
  1.4985 +                            case '\t':
  1.4986 +                            case '\u000C':
  1.4987 +                                /*
  1.4988 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.4989 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.4990 +                                 * in the before DOCTYPE public identifier
  1.4991 +                                 * state.
  1.4992 +                                 */
  1.4993 +                                continue;
  1.4994 +                            case '"':
  1.4995 +                                /*
  1.4996 +                                 * U+0022 QUOTATION MARK (") Set the DOCTYPE
  1.4997 +                                 * token's public identifier to the empty string
  1.4998 +                                 * (not missing),
  1.4999 +                                 */
  1.5000 +                                clearLongStrBuf();
  1.5001 +                                /*
  1.5002 +                                 * then switch to the DOCTYPE public identifier
  1.5003 +                                 * (double-quoted) state.
  1.5004 +                                 */
  1.5005 +                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
  1.5006 +                                break beforedoctypepublicidentifierloop;
  1.5007 +                            // continue stateloop;
  1.5008 +                            case '\'':
  1.5009 +                                /*
  1.5010 +                                 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
  1.5011 +                                 * public identifier to the empty string (not
  1.5012 +                                 * missing),
  1.5013 +                                 */
  1.5014 +                                clearLongStrBuf();
  1.5015 +                                /*
  1.5016 +                                 * then switch to the DOCTYPE public identifier
  1.5017 +                                 * (single-quoted) state.
  1.5018 +                                 */
  1.5019 +                                state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
  1.5020 +                                continue stateloop;
  1.5021 +                            case '>':
  1.5022 +                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
  1.5023 +                                errExpectedPublicId();
  1.5024 +                                /*
  1.5025 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5026 +                                 * on.
  1.5027 +                                 */
  1.5028 +                                forceQuirks = true;
  1.5029 +                                /*
  1.5030 +                                 * Emit that DOCTYPE token.
  1.5031 +                                 */
  1.5032 +                                emitDoctypeToken(pos);
  1.5033 +                                /*
  1.5034 +                                 * Switch to the data state.
  1.5035 +                                 */
  1.5036 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5037 +                                continue stateloop;
  1.5038 +                            default:
  1.5039 +                                bogusDoctype();
  1.5040 +                                /*
  1.5041 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5042 +                                 * on.
  1.5043 +                                 */
  1.5044 +                                // done by bogusDoctype();
  1.5045 +                                /*
  1.5046 +                                 * Switch to the bogus DOCTYPE state.
  1.5047 +                                 */
  1.5048 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.5049 +                                continue stateloop;
  1.5050 +                        }
  1.5051 +                    }
  1.5052 +                    // FALLTHRU DON'T REORDER
  1.5053 +                case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
  1.5054 +                    doctypepublicidentifierdoublequotedloop: for (;;) {
  1.5055 +                        if (++pos == endPos) {
  1.5056 +                            break stateloop;
  1.5057 +                        }
  1.5058 +                        c = checkChar(buf, pos);
  1.5059 +                        /*
  1.5060 +                         * Consume the next input character:
  1.5061 +                         */
  1.5062 +                        switch (c) {
  1.5063 +                            case '"':
  1.5064 +                                /*
  1.5065 +                                 * U+0022 QUOTATION MARK (") Switch to the after
  1.5066 +                                 * DOCTYPE public identifier state.
  1.5067 +                                 */
  1.5068 +                                publicIdentifier = longStrBufToString();
  1.5069 +                                state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
  1.5070 +                                break doctypepublicidentifierdoublequotedloop;
  1.5071 +                            // continue stateloop;
  1.5072 +                            case '>':
  1.5073 +                                /*
  1.5074 +                                 * U+003E GREATER-THAN SIGN (>) Parse error.
  1.5075 +                                 */
  1.5076 +                                errGtInPublicId();
  1.5077 +                                /*
  1.5078 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5079 +                                 * on.
  1.5080 +                                 */
  1.5081 +                                forceQuirks = true;
  1.5082 +                                /*
  1.5083 +                                 * Emit that DOCTYPE token.
  1.5084 +                                 */
  1.5085 +                                publicIdentifier = longStrBufToString();
  1.5086 +                                emitDoctypeToken(pos);
  1.5087 +                                /*
  1.5088 +                                 * Switch to the data state.
  1.5089 +                                 */
  1.5090 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5091 +                                continue stateloop;
  1.5092 +                            case '\r':
  1.5093 +                                appendLongStrBufCarriageReturn();
  1.5094 +                                break stateloop;
  1.5095 +                            case '\n':
  1.5096 +                                appendLongStrBufLineFeed();
  1.5097 +                                continue;
  1.5098 +                            case '\u0000':
  1.5099 +                                c = '\uFFFD';
  1.5100 +                                // fall thru
  1.5101 +                            default:
  1.5102 +                                /*
  1.5103 +                                 * Anything else Append the current input
  1.5104 +                                 * character to the current DOCTYPE token's
  1.5105 +                                 * public identifier.
  1.5106 +                                 */
  1.5107 +                                appendLongStrBuf(c);
  1.5108 +                                /*
  1.5109 +                                 * Stay in the DOCTYPE public identifier
  1.5110 +                                 * (double-quoted) state.
  1.5111 +                                 */
  1.5112 +                                continue;
  1.5113 +                        }
  1.5114 +                    }
  1.5115 +                    // FALLTHRU DON'T REORDER
  1.5116 +                case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
  1.5117 +                    afterdoctypepublicidentifierloop: for (;;) {
  1.5118 +                        if (++pos == endPos) {
  1.5119 +                            break stateloop;
  1.5120 +                        }
  1.5121 +                        c = checkChar(buf, pos);
  1.5122 +                        /*
  1.5123 +                         * Consume the next input character:
  1.5124 +                         */
  1.5125 +                        switch (c) {
  1.5126 +                            case '\r':
  1.5127 +                                silentCarriageReturn();
  1.5128 +                                state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
  1.5129 +                                break stateloop;
  1.5130 +                            case '\n':
  1.5131 +                                silentLineFeed();
  1.5132 +                                // fall thru
  1.5133 +                            case ' ':
  1.5134 +                            case '\t':
  1.5135 +                            case '\u000C':
  1.5136 +                                /*
  1.5137 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.5138 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.5139 +                                 * Switch to the between DOCTYPE public and
  1.5140 +                                 * system identifiers state.
  1.5141 +                                 */
  1.5142 +                                state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos);
  1.5143 +                                break afterdoctypepublicidentifierloop;
  1.5144 +                            // continue stateloop;
  1.5145 +                            case '>':
  1.5146 +                                /*
  1.5147 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.5148 +                                 * DOCTYPE token.
  1.5149 +                                 */
  1.5150 +                                emitDoctypeToken(pos);
  1.5151 +                                /*
  1.5152 +                                 * Switch to the data state.
  1.5153 +                                 */
  1.5154 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5155 +                                continue stateloop;
  1.5156 +                            case '"':
  1.5157 +                                /*
  1.5158 +                                 * U+0022 QUOTATION MARK (") Parse error.
  1.5159 +                                 */
  1.5160 +                                errNoSpaceBetweenPublicAndSystemIds();
  1.5161 +                                /*
  1.5162 +                                 * Set the DOCTYPE token's system identifier to
  1.5163 +                                 * the empty string (not missing),
  1.5164 +                                 */
  1.5165 +                                clearLongStrBuf();
  1.5166 +                                /*
  1.5167 +                                 * then switch to the DOCTYPE system identifier
  1.5168 +                                 * (double-quoted) state.
  1.5169 +                                 */
  1.5170 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
  1.5171 +                                continue stateloop;
  1.5172 +                            case '\'':
  1.5173 +                                /*
  1.5174 +                                 * U+0027 APOSTROPHE (') Parse error.
  1.5175 +                                 */
  1.5176 +                                errNoSpaceBetweenPublicAndSystemIds();
  1.5177 +                                /*
  1.5178 +                                 * Set the DOCTYPE token's system identifier to
  1.5179 +                                 * the empty string (not missing),
  1.5180 +                                 */
  1.5181 +                                clearLongStrBuf();
  1.5182 +                                /*
  1.5183 +                                 * then switch to the DOCTYPE system identifier
  1.5184 +                                 * (single-quoted) state.
  1.5185 +                                 */
  1.5186 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
  1.5187 +                                continue stateloop;
  1.5188 +                            default:
  1.5189 +                                bogusDoctype();
  1.5190 +                                /*
  1.5191 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5192 +                                 * on.
  1.5193 +                                 */
  1.5194 +                                // done by bogusDoctype();
  1.5195 +                                /*
  1.5196 +                                 * Switch to the bogus DOCTYPE state.
  1.5197 +                                 */
  1.5198 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.5199 +                                continue stateloop;
  1.5200 +                        }
  1.5201 +                    }
  1.5202 +                    // FALLTHRU DON'T REORDER
  1.5203 +                case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
  1.5204 +                    betweendoctypepublicandsystemidentifiersloop: for (;;) {
  1.5205 +                        if (++pos == endPos) {
  1.5206 +                            break stateloop;
  1.5207 +                        }
  1.5208 +                        c = checkChar(buf, pos);
  1.5209 +                        /*
  1.5210 +                         * Consume the next input character:
  1.5211 +                         */
  1.5212 +                        switch (c) {
  1.5213 +                            case '\r':
  1.5214 +                                silentCarriageReturn();
  1.5215 +                                break stateloop;
  1.5216 +                            case '\n':
  1.5217 +                                silentLineFeed();
  1.5218 +                                // fall thru
  1.5219 +                            case ' ':
  1.5220 +                            case '\t':
  1.5221 +                            case '\u000C':
  1.5222 +                                /*
  1.5223 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.5224 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.5225 +                                 * in the between DOCTYPE public and system
  1.5226 +                                 * identifiers state.
  1.5227 +                                 */
  1.5228 +                                continue;
  1.5229 +                            case '>':
  1.5230 +                                /*
  1.5231 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.5232 +                                 * DOCTYPE token.
  1.5233 +                                 */
  1.5234 +                                emitDoctypeToken(pos);
  1.5235 +                                /*
  1.5236 +                                 * Switch to the data state.
  1.5237 +                                 */
  1.5238 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5239 +                                continue stateloop;
  1.5240 +                            case '"':
  1.5241 +                                /*
  1.5242 +                                 * U+0022 QUOTATION MARK (") Set the DOCTYPE
  1.5243 +                                 * token's system identifier to the empty string
  1.5244 +                                 * (not missing),
  1.5245 +                                 */
  1.5246 +                                clearLongStrBuf();
  1.5247 +                                /*
  1.5248 +                                 * then switch to the DOCTYPE system identifier
  1.5249 +                                 * (double-quoted) state.
  1.5250 +                                 */
  1.5251 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
  1.5252 +                                break betweendoctypepublicandsystemidentifiersloop;
  1.5253 +                            // continue stateloop;
  1.5254 +                            case '\'':
  1.5255 +                                /*
  1.5256 +                                 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
  1.5257 +                                 * system identifier to the empty string (not
  1.5258 +                                 * missing),
  1.5259 +                                 */
  1.5260 +                                clearLongStrBuf();
  1.5261 +                                /*
  1.5262 +                                 * then switch to the DOCTYPE system identifier
  1.5263 +                                 * (single-quoted) state.
  1.5264 +                                 */
  1.5265 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
  1.5266 +                                continue stateloop;
  1.5267 +                            default:
  1.5268 +                                bogusDoctype();
  1.5269 +                                /*
  1.5270 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5271 +                                 * on.
  1.5272 +                                 */
  1.5273 +                                // done by bogusDoctype();
  1.5274 +                                /*
  1.5275 +                                 * Switch to the bogus DOCTYPE state.
  1.5276 +                                 */
  1.5277 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.5278 +                                continue stateloop;
  1.5279 +                        }
  1.5280 +                    }
  1.5281 +                    // FALLTHRU DON'T REORDER
  1.5282 +                case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
  1.5283 +                    doctypesystemidentifierdoublequotedloop: for (;;) {
  1.5284 +                        if (++pos == endPos) {
  1.5285 +                            break stateloop;
  1.5286 +                        }
  1.5287 +                        c = checkChar(buf, pos);
  1.5288 +                        /*
  1.5289 +                         * Consume the next input character:
  1.5290 +                         */
  1.5291 +                        switch (c) {
  1.5292 +                            case '"':
  1.5293 +                                /*
  1.5294 +                                 * U+0022 QUOTATION MARK (") Switch to the after
  1.5295 +                                 * DOCTYPE system identifier state.
  1.5296 +                                 */
  1.5297 +                                systemIdentifier = longStrBufToString();
  1.5298 +                                state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
  1.5299 +                                continue stateloop;
  1.5300 +                            case '>':
  1.5301 +                                /*
  1.5302 +                                 * U+003E GREATER-THAN SIGN (>) Parse error.
  1.5303 +                                 */
  1.5304 +                                errGtInSystemId();
  1.5305 +                                /*
  1.5306 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5307 +                                 * on.
  1.5308 +                                 */
  1.5309 +                                forceQuirks = true;
  1.5310 +                                /*
  1.5311 +                                 * Emit that DOCTYPE token.
  1.5312 +                                 */
  1.5313 +                                systemIdentifier = longStrBufToString();
  1.5314 +                                emitDoctypeToken(pos);
  1.5315 +                                /*
  1.5316 +                                 * Switch to the data state.
  1.5317 +                                 */
  1.5318 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5319 +                                continue stateloop;
  1.5320 +                            case '\r':
  1.5321 +                                appendLongStrBufCarriageReturn();
  1.5322 +                                break stateloop;
  1.5323 +                            case '\n':
  1.5324 +                                appendLongStrBufLineFeed();
  1.5325 +                                continue;
  1.5326 +                            case '\u0000':
  1.5327 +                                c = '\uFFFD';
  1.5328 +                                // fall thru
  1.5329 +                            default:
  1.5330 +                                /*
  1.5331 +                                 * Anything else Append the current input
  1.5332 +                                 * character to the current DOCTYPE token's
  1.5333 +                                 * system identifier.
  1.5334 +                                 */
  1.5335 +                                appendLongStrBuf(c);
  1.5336 +                                /*
  1.5337 +                                 * Stay in the DOCTYPE system identifier
  1.5338 +                                 * (double-quoted) state.
  1.5339 +                                 */
  1.5340 +                                continue;
  1.5341 +                        }
  1.5342 +                    }
  1.5343 +                    // FALLTHRU DON'T REORDER
  1.5344 +                case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
  1.5345 +                    afterdoctypesystemidentifierloop: for (;;) {
  1.5346 +                        if (++pos == endPos) {
  1.5347 +                            break stateloop;
  1.5348 +                        }
  1.5349 +                        c = checkChar(buf, pos);
  1.5350 +                        /*
  1.5351 +                         * Consume the next input character:
  1.5352 +                         */
  1.5353 +                        switch (c) {
  1.5354 +                            case '\r':
  1.5355 +                                silentCarriageReturn();
  1.5356 +                                break stateloop;
  1.5357 +                            case '\n':
  1.5358 +                                silentLineFeed();
  1.5359 +                                // fall thru
  1.5360 +                            case ' ':
  1.5361 +                            case '\t':
  1.5362 +                            case '\u000C':
  1.5363 +                                /*
  1.5364 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.5365 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.5366 +                                 * in the after DOCTYPE system identifier state.
  1.5367 +                                 */
  1.5368 +                                continue;
  1.5369 +                            case '>':
  1.5370 +                                /*
  1.5371 +                                 * U+003E GREATER-THAN SIGN (>) Emit the current
  1.5372 +                                 * DOCTYPE token.
  1.5373 +                                 */
  1.5374 +                                emitDoctypeToken(pos);
  1.5375 +                                /*
  1.5376 +                                 * Switch to the data state.
  1.5377 +                                 */
  1.5378 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5379 +                                continue stateloop;
  1.5380 +                            default:
  1.5381 +                                /*
  1.5382 +                                 * Switch to the bogus DOCTYPE state. (This does
  1.5383 +                                 * not set the DOCTYPE token's force-quirks flag
  1.5384 +                                 * to on.)
  1.5385 +                                 */
  1.5386 +                                bogusDoctypeWithoutQuirks();
  1.5387 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.5388 +                                break afterdoctypesystemidentifierloop;
  1.5389 +                            // continue stateloop;
  1.5390 +                        }
  1.5391 +                    }
  1.5392 +                    // FALLTHRU DON'T REORDER
  1.5393 +                case BOGUS_DOCTYPE:
  1.5394 +                    for (;;) {
  1.5395 +                        if (reconsume) {
  1.5396 +                            reconsume = false;
  1.5397 +                        } else {
  1.5398 +                            if (++pos == endPos) {
  1.5399 +                                break stateloop;
  1.5400 +                            }
  1.5401 +                            c = checkChar(buf, pos);
  1.5402 +                        }
  1.5403 +                        /*
  1.5404 +                         * Consume the next input character:
  1.5405 +                         */
  1.5406 +                        switch (c) {
  1.5407 +                            case '>':
  1.5408 +                                /*
  1.5409 +                                 * U+003E GREATER-THAN SIGN (>) Emit that
  1.5410 +                                 * DOCTYPE token.
  1.5411 +                                 */
  1.5412 +                                emitDoctypeToken(pos);
  1.5413 +                                /*
  1.5414 +                                 * Switch to the data state.
  1.5415 +                                 */
  1.5416 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5417 +                                continue stateloop;
  1.5418 +                            case '\r':
  1.5419 +                                silentCarriageReturn();
  1.5420 +                                break stateloop;
  1.5421 +                            case '\n':
  1.5422 +                                silentLineFeed();
  1.5423 +                                // fall thru
  1.5424 +                            default:
  1.5425 +                                /*
  1.5426 +                                 * Anything else Stay in the bogus DOCTYPE
  1.5427 +                                 * state.
  1.5428 +                                 */
  1.5429 +                                continue;
  1.5430 +                        }
  1.5431 +                    }
  1.5432 +                    // XXX reorder point
  1.5433 +                case DOCTYPE_YSTEM:
  1.5434 +                    doctypeystemloop: for (;;) {
  1.5435 +                        if (++pos == endPos) {
  1.5436 +                            break stateloop;
  1.5437 +                        }
  1.5438 +                        c = checkChar(buf, pos);
  1.5439 +                        /*
  1.5440 +                         * Otherwise, if the six characters starting from the
  1.5441 +                         * current input character are an ASCII case-insensitive
  1.5442 +                         * match for the word "SYSTEM", then consume those
  1.5443 +                         * characters and switch to the before DOCTYPE system
  1.5444 +                         * identifier state.
  1.5445 +                         */
  1.5446 +                        if (index < 5) { // YSTEM.length
  1.5447 +                            char folded = c;
  1.5448 +                            if (c >= 'A' && c <= 'Z') {
  1.5449 +                                folded += 0x20;
  1.5450 +                            }
  1.5451 +                            if (folded != Tokenizer.YSTEM[index]) {
  1.5452 +                                bogusDoctype();
  1.5453 +                                reconsume = true;
  1.5454 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.5455 +                                continue stateloop;
  1.5456 +                            }
  1.5457 +                            index++;
  1.5458 +                            continue stateloop;
  1.5459 +                        } else {
  1.5460 +                            reconsume = true;
  1.5461 +                            state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
  1.5462 +                            break doctypeystemloop;
  1.5463 +                            // continue stateloop;
  1.5464 +                        }
  1.5465 +                    }
  1.5466 +                    // FALLTHRU DON'T REORDER
  1.5467 +                case AFTER_DOCTYPE_SYSTEM_KEYWORD:
  1.5468 +                    afterdoctypesystemkeywordloop: for (;;) {
  1.5469 +                        if (reconsume) {
  1.5470 +                            reconsume = false;
  1.5471 +                        } else {
  1.5472 +                            if (++pos == endPos) {
  1.5473 +                                break stateloop;
  1.5474 +                            }
  1.5475 +                            c = checkChar(buf, pos);
  1.5476 +                        }
  1.5477 +                        /*
  1.5478 +                         * Consume the next input character:
  1.5479 +                         */
  1.5480 +                        switch (c) {
  1.5481 +                            case '\r':
  1.5482 +                                silentCarriageReturn();
  1.5483 +                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
  1.5484 +                                break stateloop;
  1.5485 +                            case '\n':
  1.5486 +                                silentLineFeed();
  1.5487 +                                // fall thru
  1.5488 +                            case ' ':
  1.5489 +                            case '\t':
  1.5490 +                            case '\u000C':
  1.5491 +                                /*
  1.5492 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.5493 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE
  1.5494 +                                 * Switch to the before DOCTYPE public
  1.5495 +                                 * identifier state.
  1.5496 +                                 */
  1.5497 +                                state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
  1.5498 +                                break afterdoctypesystemkeywordloop;
  1.5499 +                            // FALL THROUGH continue stateloop
  1.5500 +                            case '"':
  1.5501 +                                /*
  1.5502 +                                 * U+0022 QUOTATION MARK (") Parse Error.
  1.5503 +                                 */
  1.5504 +                                errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
  1.5505 +                                /*
  1.5506 +                                 * Set the DOCTYPE token's system identifier to
  1.5507 +                                 * the empty string (not missing),
  1.5508 +                                 */
  1.5509 +                                clearLongStrBuf();
  1.5510 +                                /*
  1.5511 +                                 * then switch to the DOCTYPE public identifier
  1.5512 +                                 * (double-quoted) state.
  1.5513 +                                 */
  1.5514 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
  1.5515 +                                continue stateloop;
  1.5516 +                            case '\'':
  1.5517 +                                /*
  1.5518 +                                 * U+0027 APOSTROPHE (') Parse Error.
  1.5519 +                                 */
  1.5520 +                                errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
  1.5521 +                                /*
  1.5522 +                                 * Set the DOCTYPE token's public identifier to
  1.5523 +                                 * the empty string (not missing),
  1.5524 +                                 */
  1.5525 +                                clearLongStrBuf();
  1.5526 +                                /*
  1.5527 +                                 * then switch to the DOCTYPE public identifier
  1.5528 +                                 * (single-quoted) state.
  1.5529 +                                 */
  1.5530 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
  1.5531 +                                continue stateloop;
  1.5532 +                            case '>':
  1.5533 +                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
  1.5534 +                                errExpectedPublicId();
  1.5535 +                                /*
  1.5536 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5537 +                                 * on.
  1.5538 +                                 */
  1.5539 +                                forceQuirks = true;
  1.5540 +                                /*
  1.5541 +                                 * Emit that DOCTYPE token.
  1.5542 +                                 */
  1.5543 +                                emitDoctypeToken(pos);
  1.5544 +                                /*
  1.5545 +                                 * Switch to the data state.
  1.5546 +                                 */
  1.5547 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5548 +                                continue stateloop;
  1.5549 +                            default:
  1.5550 +                                bogusDoctype();
  1.5551 +                                /*
  1.5552 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5553 +                                 * on.
  1.5554 +                                 */
  1.5555 +                                // done by bogusDoctype();
  1.5556 +                                /*
  1.5557 +                                 * Switch to the bogus DOCTYPE state.
  1.5558 +                                 */
  1.5559 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.5560 +                                continue stateloop;
  1.5561 +                        }
  1.5562 +                    }
  1.5563 +                    // FALLTHRU DON'T REORDER
  1.5564 +                case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
  1.5565 +                    beforedoctypesystemidentifierloop: for (;;) {
  1.5566 +                        if (++pos == endPos) {
  1.5567 +                            break stateloop;
  1.5568 +                        }
  1.5569 +                        c = checkChar(buf, pos);
  1.5570 +                        /*
  1.5571 +                         * Consume the next input character:
  1.5572 +                         */
  1.5573 +                        switch (c) {
  1.5574 +                            case '\r':
  1.5575 +                                silentCarriageReturn();
  1.5576 +                                break stateloop;
  1.5577 +                            case '\n':
  1.5578 +                                silentLineFeed();
  1.5579 +                                // fall thru
  1.5580 +                            case ' ':
  1.5581 +                            case '\t':
  1.5582 +                            case '\u000C':
  1.5583 +                                /*
  1.5584 +                                 * U+0009 CHARACTER TABULATION U+000A LINE FEED
  1.5585 +                                 * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay
  1.5586 +                                 * in the before DOCTYPE system identifier
  1.5587 +                                 * state.
  1.5588 +                                 */
  1.5589 +                                continue;
  1.5590 +                            case '"':
  1.5591 +                                /*
  1.5592 +                                 * U+0022 QUOTATION MARK (") Set the DOCTYPE
  1.5593 +                                 * token's system identifier to the empty string
  1.5594 +                                 * (not missing),
  1.5595 +                                 */
  1.5596 +                                clearLongStrBuf();
  1.5597 +                                /*
  1.5598 +                                 * then switch to the DOCTYPE system identifier
  1.5599 +                                 * (double-quoted) state.
  1.5600 +                                 */
  1.5601 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
  1.5602 +                                continue stateloop;
  1.5603 +                            case '\'':
  1.5604 +                                /*
  1.5605 +                                 * U+0027 APOSTROPHE (') Set the DOCTYPE token's
  1.5606 +                                 * system identifier to the empty string (not
  1.5607 +                                 * missing),
  1.5608 +                                 */
  1.5609 +                                clearLongStrBuf();
  1.5610 +                                /*
  1.5611 +                                 * then switch to the DOCTYPE system identifier
  1.5612 +                                 * (single-quoted) state.
  1.5613 +                                 */
  1.5614 +                                state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
  1.5615 +                                break beforedoctypesystemidentifierloop;
  1.5616 +                            // continue stateloop;
  1.5617 +                            case '>':
  1.5618 +                                /* U+003E GREATER-THAN SIGN (>) Parse error. */
  1.5619 +                                errExpectedSystemId();
  1.5620 +                                /*
  1.5621 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5622 +                                 * on.
  1.5623 +                                 */
  1.5624 +                                forceQuirks = true;
  1.5625 +                                /*
  1.5626 +                                 * Emit that DOCTYPE token.
  1.5627 +                                 */
  1.5628 +                                emitDoctypeToken(pos);
  1.5629 +                                /*
  1.5630 +                                 * Switch to the data state.
  1.5631 +                                 */
  1.5632 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5633 +                                continue stateloop;
  1.5634 +                            default:
  1.5635 +                                bogusDoctype();
  1.5636 +                                /*
  1.5637 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5638 +                                 * on.
  1.5639 +                                 */
  1.5640 +                                // done by bogusDoctype();
  1.5641 +                                /*
  1.5642 +                                 * Switch to the bogus DOCTYPE state.
  1.5643 +                                 */
  1.5644 +                                state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos);
  1.5645 +                                continue stateloop;
  1.5646 +                        }
  1.5647 +                    }
  1.5648 +                    // FALLTHRU DON'T REORDER
  1.5649 +                case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
  1.5650 +                    for (;;) {
  1.5651 +                        if (++pos == endPos) {
  1.5652 +                            break stateloop;
  1.5653 +                        }
  1.5654 +                        c = checkChar(buf, pos);
  1.5655 +                        /*
  1.5656 +                         * Consume the next input character:
  1.5657 +                         */
  1.5658 +                        switch (c) {
  1.5659 +                            case '\'':
  1.5660 +                                /*
  1.5661 +                                 * U+0027 APOSTROPHE (') Switch to the after
  1.5662 +                                 * DOCTYPE system identifier state.
  1.5663 +                                 */
  1.5664 +                                systemIdentifier = longStrBufToString();
  1.5665 +                                state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
  1.5666 +                                continue stateloop;
  1.5667 +                            case '>':
  1.5668 +                                errGtInSystemId();
  1.5669 +                                /*
  1.5670 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5671 +                                 * on.
  1.5672 +                                 */
  1.5673 +                                forceQuirks = true;
  1.5674 +                                /*
  1.5675 +                                 * Emit that DOCTYPE token.
  1.5676 +                                 */
  1.5677 +                                systemIdentifier = longStrBufToString();
  1.5678 +                                emitDoctypeToken(pos);
  1.5679 +                                /*
  1.5680 +                                 * Switch to the data state.
  1.5681 +                                 */
  1.5682 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5683 +                                continue stateloop;
  1.5684 +                            case '\r':
  1.5685 +                                appendLongStrBufCarriageReturn();
  1.5686 +                                break stateloop;
  1.5687 +                            case '\n':
  1.5688 +                                appendLongStrBufLineFeed();
  1.5689 +                                continue;
  1.5690 +                            case '\u0000':
  1.5691 +                                c = '\uFFFD';
  1.5692 +                                // fall thru
  1.5693 +                            default:
  1.5694 +                                /*
  1.5695 +                                 * Anything else Append the current input
  1.5696 +                                 * character to the current DOCTYPE token's
  1.5697 +                                 * system identifier.
  1.5698 +                                 */
  1.5699 +                                appendLongStrBuf(c);
  1.5700 +                                /*
  1.5701 +                                 * Stay in the DOCTYPE system identifier
  1.5702 +                                 * (double-quoted) state.
  1.5703 +                                 */
  1.5704 +                                continue;
  1.5705 +                        }
  1.5706 +                    }
  1.5707 +                    // XXX reorder point
  1.5708 +                case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
  1.5709 +                    for (;;) {
  1.5710 +                        if (++pos == endPos) {
  1.5711 +                            break stateloop;
  1.5712 +                        }
  1.5713 +                        c = checkChar(buf, pos);
  1.5714 +                        /*
  1.5715 +                         * Consume the next input character:
  1.5716 +                         */
  1.5717 +                        switch (c) {
  1.5718 +                            case '\'':
  1.5719 +                                /*
  1.5720 +                                 * U+0027 APOSTROPHE (') Switch to the after
  1.5721 +                                 * DOCTYPE public identifier state.
  1.5722 +                                 */
  1.5723 +                                publicIdentifier = longStrBufToString();
  1.5724 +                                state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
  1.5725 +                                continue stateloop;
  1.5726 +                            case '>':
  1.5727 +                                errGtInPublicId();
  1.5728 +                                /*
  1.5729 +                                 * Set the DOCTYPE token's force-quirks flag to
  1.5730 +                                 * on.
  1.5731 +                                 */
  1.5732 +                                forceQuirks = true;
  1.5733 +                                /*
  1.5734 +                                 * Emit that DOCTYPE token.
  1.5735 +                                 */
  1.5736 +                                publicIdentifier = longStrBufToString();
  1.5737 +                                emitDoctypeToken(pos);
  1.5738 +                                /*
  1.5739 +                                 * Switch to the data state.
  1.5740 +                                 */
  1.5741 +                                state = transition(state, Tokenizer.DATA, reconsume, pos);
  1.5742 +                                continue stateloop;
  1.5743 +                            case '\r':
  1.5744 +                                appendLongStrBufCarriageReturn();
  1.5745 +                                break stateloop;
  1.5746 +                            case '\n':
  1.5747 +                                appendLongStrBufLineFeed();
  1.5748 +                                continue;
  1.5749 +                            case '\u0000':
  1.5750 +                                c = '\uFFFD';
  1.5751 +                                // fall thru
  1.5752 +                            default:
  1.5753 +                                /*
  1.5754 +                                 * Anything else Append the current input
  1.5755 +                                 * character to the current DOCTYPE token's
  1.5756 +                                 * public identifier.
  1.5757 +                                 */
  1.5758 +                                appendLongStrBuf(c);
  1.5759 +                                /*
  1.5760 +                                 * Stay in the DOCTYPE public identifier
  1.5761 +                                 * (single-quoted) state.
  1.5762 +                                 */
  1.5763 +                                continue;
  1.5764 +                        }
  1.5765 +                    }
  1.5766 +                    // XXX reorder point
  1.5767 +                case PROCESSING_INSTRUCTION:
  1.5768 +                    processinginstructionloop: for (;;) {
  1.5769 +                        if (++pos == endPos) {
  1.5770 +                            break stateloop;
  1.5771 +                        }
  1.5772 +                        c = checkChar(buf, pos);
  1.5773 +                        switch (c) {
  1.5774 +                            case '?':
  1.5775 +                                state = transition(
  1.5776 +                                        state,
  1.5777 +                                        Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK,
  1.5778 +                                        reconsume, pos);
  1.5779 +                                break processinginstructionloop;
  1.5780 +                            // continue stateloop;
  1.5781 +                            default:
  1.5782 +                                continue;
  1.5783 +                        }
  1.5784 +                    }
  1.5785 +                case PROCESSING_INSTRUCTION_QUESTION_MARK:
  1.5786 +                    if (++pos == endPos) {
  1.5787 +                        break stateloop;
  1.5788 +                    }
  1.5789 +                    c = checkChar(buf, pos);
  1.5790 +                    switch (c) {
  1.5791 +                        case '>':
  1.5792 +                            state = transition(state, Tokenizer.DATA,
  1.5793 +                                    reconsume, pos);
  1.5794 +                            continue stateloop;
  1.5795 +                        default:
  1.5796 +                            state = transition(state,
  1.5797 +                                    Tokenizer.PROCESSING_INSTRUCTION,
  1.5798 +                                    reconsume, pos);
  1.5799 +                            continue stateloop;
  1.5800 +                    }
  1.5801 +                    // END HOTSPOT WORKAROUND
  1.5802 +            }
  1.5803 +        }
  1.5804 +        flushChars(buf, pos);
  1.5805 +        /*
  1.5806 +         * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; }
  1.5807 +         */
  1.5808 +        // Save locals
  1.5809 +        stateSave = state;
  1.5810 +        returnStateSave = returnState;
  1.5811 +        return pos;
  1.5812 +    }
  1.5813 +    
  1.5814 +    // HOTSPOT WORKAROUND INSERTION POINT
  1.5815 +    
  1.5816 +    // [NOCPP[
  1.5817 +    
  1.5818 +    protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException {
  1.5819 +        return to;
  1.5820 +    }
  1.5821 +
  1.5822 +    // ]NOCPP]
  1.5823 +    
  1.5824 +    private void initDoctypeFields() {
  1.5825 +        doctypeName = "";
  1.5826 +        if (systemIdentifier != null) {
  1.5827 +            Portability.releaseString(systemIdentifier);
  1.5828 +            systemIdentifier = null;
  1.5829 +        }
  1.5830 +        if (publicIdentifier != null) {
  1.5831 +            Portability.releaseString(publicIdentifier);
  1.5832 +            publicIdentifier = null;
  1.5833 +        }
  1.5834 +        forceQuirks = false;
  1.5835 +    }
  1.5836 +
  1.5837 +    @Inline private void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
  1.5838 +            throws SAXException {
  1.5839 +        silentCarriageReturn();
  1.5840 +        adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
  1.5841 +    }
  1.5842 +
  1.5843 +    @Inline private void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
  1.5844 +            throws SAXException {
  1.5845 +        silentLineFeed();
  1.5846 +        adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
  1.5847 +    }
  1.5848 +
  1.5849 +    @Inline private void appendLongStrBufLineFeed() {
  1.5850 +        silentLineFeed();
  1.5851 +        appendLongStrBuf('\n');
  1.5852 +    }
  1.5853 +
  1.5854 +    @Inline private void appendLongStrBufCarriageReturn() {
  1.5855 +        silentCarriageReturn();
  1.5856 +        appendLongStrBuf('\n');
  1.5857 +    }
  1.5858 +
  1.5859 +    @Inline protected void silentCarriageReturn() {
  1.5860 +        ++line;
  1.5861 +        lastCR = true;
  1.5862 +    }
  1.5863 +
  1.5864 +    @Inline protected void silentLineFeed() {
  1.5865 +        ++line;
  1.5866 +    }
  1.5867 +
  1.5868 +    private void emitCarriageReturn(@NoLength char[] buf, int pos)
  1.5869 +            throws SAXException {
  1.5870 +        silentCarriageReturn();
  1.5871 +        flushChars(buf, pos);
  1.5872 +        tokenHandler.characters(Tokenizer.LF, 0, 1);
  1.5873 +        cstart = Integer.MAX_VALUE;
  1.5874 +    }
  1.5875 +
  1.5876 +    private void emitReplacementCharacter(@NoLength char[] buf, int pos)
  1.5877 +            throws SAXException {
  1.5878 +        flushChars(buf, pos);
  1.5879 +        tokenHandler.zeroOriginatingReplacementCharacter();
  1.5880 +        cstart = pos + 1;
  1.5881 +    }
  1.5882 +
  1.5883 +    private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos)
  1.5884 +            throws SAXException {
  1.5885 +        flushChars(buf, pos);
  1.5886 +        tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1);
  1.5887 +        cstart = pos + 1;
  1.5888 +    }
  1.5889 +
  1.5890 +    private void setAdditionalAndRememberAmpersandLocation(char add) {
  1.5891 +        additional = add;
  1.5892 +        // [NOCPP[
  1.5893 +        ampersandLocation = new LocatorImpl(this);
  1.5894 +        // ]NOCPP]
  1.5895 +    }
  1.5896 +
  1.5897 +    private void bogusDoctype() throws SAXException {
  1.5898 +        errBogusDoctype();
  1.5899 +        forceQuirks = true;
  1.5900 +    }
  1.5901 +
  1.5902 +    private void bogusDoctypeWithoutQuirks() throws SAXException {
  1.5903 +        errBogusDoctype();
  1.5904 +        forceQuirks = false;
  1.5905 +    }
  1.5906 +
  1.5907 +    private void emitOrAppendStrBuf(int returnState) throws SAXException {
  1.5908 +        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.5909 +            appendStrBufToLongStrBuf();
  1.5910 +        } else {
  1.5911 +            emitStrBuf();
  1.5912 +        }
  1.5913 +    }
  1.5914 +
  1.5915 +    private void handleNcrValue(int returnState) throws SAXException {
  1.5916 +        /*
  1.5917 +         * If one or more characters match the range, then take them all and
  1.5918 +         * interpret the string of characters as a number (either hexadecimal or
  1.5919 +         * decimal as appropriate).
  1.5920 +         */
  1.5921 +        if (value <= 0xFFFF) {
  1.5922 +            if (value >= 0x80 && value <= 0x9f) {
  1.5923 +                /*
  1.5924 +                 * If that number is one of the numbers in the first column of
  1.5925 +                 * the following table, then this is a parse error.
  1.5926 +                 */
  1.5927 +                errNcrInC1Range();
  1.5928 +                /*
  1.5929 +                 * Find the row with that number in the first column, and return
  1.5930 +                 * a character token for the Unicode character given in the
  1.5931 +                 * second column of that row.
  1.5932 +                 */
  1.5933 +                @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80];
  1.5934 +                emitOrAppendOne(val, returnState);
  1.5935 +                // [NOCPP[
  1.5936 +            } else if (value == 0xC
  1.5937 +                    && contentSpacePolicy != XmlViolationPolicy.ALLOW) {
  1.5938 +                if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) {
  1.5939 +                    emitOrAppendOne(Tokenizer.SPACE, returnState);
  1.5940 +                } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) {
  1.5941 +                    fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space.");
  1.5942 +                }
  1.5943 +                // ]NOCPP]
  1.5944 +            } else if (value == 0x0) {
  1.5945 +                errNcrZero();
  1.5946 +                emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
  1.5947 +            } else if ((value & 0xF800) == 0xD800) {
  1.5948 +                errNcrSurrogate();
  1.5949 +                emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
  1.5950 +            } else {
  1.5951 +                /*
  1.5952 +                 * Otherwise, return a character token for the Unicode character
  1.5953 +                 * whose code point is that number.
  1.5954 +                 */
  1.5955 +                char ch = (char) value;
  1.5956 +                // [NOCPP[
  1.5957 +                if (value == 0x0D) {
  1.5958 +                    errNcrCr();
  1.5959 +                } else if ((value <= 0x0008) || (value == 0x000B)
  1.5960 +                        || (value >= 0x000E && value <= 0x001F)) {
  1.5961 +                    ch = errNcrControlChar(ch);
  1.5962 +                } else if (value >= 0xFDD0 && value <= 0xFDEF) {
  1.5963 +                    errNcrUnassigned();
  1.5964 +                } else if ((value & 0xFFFE) == 0xFFFE) {
  1.5965 +                    ch = errNcrNonCharacter(ch);
  1.5966 +                } else if (value >= 0x007F && value <= 0x009F) {
  1.5967 +                    errNcrControlChar();
  1.5968 +                } else {
  1.5969 +                    maybeWarnPrivateUse(ch);
  1.5970 +                }
  1.5971 +                // ]NOCPP]
  1.5972 +                bmpChar[0] = ch;
  1.5973 +                emitOrAppendOne(bmpChar, returnState);
  1.5974 +            }
  1.5975 +        } else if (value <= 0x10FFFF) {
  1.5976 +            // [NOCPP[
  1.5977 +            maybeWarnPrivateUseAstral();
  1.5978 +            if ((value & 0xFFFE) == 0xFFFE) {
  1.5979 +                errAstralNonCharacter(value);
  1.5980 +            }
  1.5981 +            // ]NOCPP]
  1.5982 +            astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10));
  1.5983 +            astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
  1.5984 +            emitOrAppendTwo(astralChar, returnState);
  1.5985 +        } else {
  1.5986 +            errNcrOutOfRange();
  1.5987 +            emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState);
  1.5988 +        }
  1.5989 +    }
  1.5990 +
  1.5991 +    public void eof() throws SAXException {
  1.5992 +        int state = stateSave;
  1.5993 +        int returnState = returnStateSave;
  1.5994 +
  1.5995 +        eofloop: for (;;) {
  1.5996 +            switch (state) {
  1.5997 +                case SCRIPT_DATA_LESS_THAN_SIGN:
  1.5998 +                case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
  1.5999 +                    /*
  1.6000 +                     * Otherwise, emit a U+003C LESS-THAN SIGN character token
  1.6001 +                     */
  1.6002 +                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.6003 +                    /*
  1.6004 +                     * and reconsume the current input character in the data
  1.6005 +                     * state.
  1.6006 +                     */
  1.6007 +                    break eofloop;
  1.6008 +                case TAG_OPEN:
  1.6009 +                    /*
  1.6010 +                     * The behavior of this state depends on the content model
  1.6011 +                     * flag.
  1.6012 +                     */
  1.6013 +                    /*
  1.6014 +                     * Anything else Parse error.
  1.6015 +                     */
  1.6016 +                    errEofAfterLt();
  1.6017 +                    /*
  1.6018 +                     * Emit a U+003C LESS-THAN SIGN character token
  1.6019 +                     */
  1.6020 +                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.6021 +                    /*
  1.6022 +                     * and reconsume the current input character in the data
  1.6023 +                     * state.
  1.6024 +                     */
  1.6025 +                    break eofloop;
  1.6026 +                case RAWTEXT_RCDATA_LESS_THAN_SIGN:
  1.6027 +                    /*
  1.6028 +                     * Emit a U+003C LESS-THAN SIGN character token
  1.6029 +                     */
  1.6030 +                    tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
  1.6031 +                    /*
  1.6032 +                     * and reconsume the current input character in the RCDATA
  1.6033 +                     * state.
  1.6034 +                     */
  1.6035 +                    break eofloop;
  1.6036 +                case NON_DATA_END_TAG_NAME:
  1.6037 +                    /*
  1.6038 +                     * Emit a U+003C LESS-THAN SIGN character token, a U+002F
  1.6039 +                     * SOLIDUS character token,
  1.6040 +                     */
  1.6041 +                    tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
  1.6042 +                    /*
  1.6043 +                     * a character token for each of the characters in the
  1.6044 +                     * temporary buffer (in the order they were added to the
  1.6045 +                     * buffer),
  1.6046 +                     */
  1.6047 +                    emitStrBuf();
  1.6048 +                    /*
  1.6049 +                     * and reconsume the current input character in the RCDATA
  1.6050 +                     * state.
  1.6051 +                     */
  1.6052 +                    break eofloop;
  1.6053 +                case CLOSE_TAG_OPEN:
  1.6054 +                    /* EOF Parse error. */
  1.6055 +                    errEofAfterLt();
  1.6056 +                    /*
  1.6057 +                     * Emit a U+003C LESS-THAN SIGN character token and a U+002F
  1.6058 +                     * SOLIDUS character token.
  1.6059 +                     */
  1.6060 +                    tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2);
  1.6061 +                    /*
  1.6062 +                     * Reconsume the EOF character in the data state.
  1.6063 +                     */
  1.6064 +                    break eofloop;
  1.6065 +                case TAG_NAME:
  1.6066 +                    /*
  1.6067 +                     * EOF Parse error.
  1.6068 +                     */
  1.6069 +                    errEofInTagName();
  1.6070 +                    /*
  1.6071 +                     * Reconsume the EOF character in the data state.
  1.6072 +                     */
  1.6073 +                    break eofloop;
  1.6074 +                case BEFORE_ATTRIBUTE_NAME:
  1.6075 +                case AFTER_ATTRIBUTE_VALUE_QUOTED:
  1.6076 +                case SELF_CLOSING_START_TAG:
  1.6077 +                    /* EOF Parse error. */
  1.6078 +                    errEofWithoutGt();
  1.6079 +                    /*
  1.6080 +                     * Reconsume the EOF character in the data state.
  1.6081 +                     */
  1.6082 +                    break eofloop;
  1.6083 +                case ATTRIBUTE_NAME:
  1.6084 +                    /*
  1.6085 +                     * EOF Parse error.
  1.6086 +                     */
  1.6087 +                    errEofInAttributeName();
  1.6088 +                    /*
  1.6089 +                     * Reconsume the EOF character in the data state.
  1.6090 +                     */
  1.6091 +                    break eofloop;
  1.6092 +                case AFTER_ATTRIBUTE_NAME:
  1.6093 +                case BEFORE_ATTRIBUTE_VALUE:
  1.6094 +                    /* EOF Parse error. */
  1.6095 +                    errEofWithoutGt();
  1.6096 +                    /*
  1.6097 +                     * Reconsume the EOF character in the data state.
  1.6098 +                     */
  1.6099 +                    break eofloop;
  1.6100 +                case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
  1.6101 +                case ATTRIBUTE_VALUE_SINGLE_QUOTED:
  1.6102 +                case ATTRIBUTE_VALUE_UNQUOTED:
  1.6103 +                    /* EOF Parse error. */
  1.6104 +                    errEofInAttributeValue();
  1.6105 +                    /*
  1.6106 +                     * Reconsume the EOF character in the data state.
  1.6107 +                     */
  1.6108 +                    break eofloop;
  1.6109 +                case BOGUS_COMMENT:
  1.6110 +                    emitComment(0, 0);
  1.6111 +                    break eofloop;
  1.6112 +                case BOGUS_COMMENT_HYPHEN:
  1.6113 +                    // [NOCPP[
  1.6114 +                    maybeAppendSpaceToBogusComment();
  1.6115 +                    // ]NOCPP]
  1.6116 +                    emitComment(0, 0);
  1.6117 +                    break eofloop;
  1.6118 +                case MARKUP_DECLARATION_OPEN:
  1.6119 +                    errBogusComment();
  1.6120 +                    clearLongStrBuf();
  1.6121 +                    emitComment(0, 0);
  1.6122 +                    break eofloop;
  1.6123 +                case MARKUP_DECLARATION_HYPHEN:
  1.6124 +                    errBogusComment();
  1.6125 +                    emitComment(0, 0);
  1.6126 +                    break eofloop;
  1.6127 +                case MARKUP_DECLARATION_OCTYPE:
  1.6128 +                    if (index < 6) {
  1.6129 +                        errBogusComment();
  1.6130 +                        emitComment(0, 0);
  1.6131 +                    } else {
  1.6132 +                        /* EOF Parse error. */
  1.6133 +                        errEofInDoctype();
  1.6134 +                        /*
  1.6135 +                         * Create a new DOCTYPE token. Set its force-quirks flag
  1.6136 +                         * to on.
  1.6137 +                         */
  1.6138 +                        doctypeName = "";
  1.6139 +                        if (systemIdentifier != null) {
  1.6140 +                            Portability.releaseString(systemIdentifier);
  1.6141 +                            systemIdentifier = null;
  1.6142 +                        }
  1.6143 +                        if (publicIdentifier != null) {
  1.6144 +                            Portability.releaseString(publicIdentifier);
  1.6145 +                            publicIdentifier = null;
  1.6146 +                        }
  1.6147 +                        forceQuirks = true;
  1.6148 +                        /*
  1.6149 +                         * Emit the token.
  1.6150 +                         */
  1.6151 +                        emitDoctypeToken(0);
  1.6152 +                        /*
  1.6153 +                         * Reconsume the EOF character in the data state.
  1.6154 +                         */
  1.6155 +                        break eofloop;
  1.6156 +                    }
  1.6157 +                    break eofloop;
  1.6158 +                case COMMENT_START:
  1.6159 +                case COMMENT:
  1.6160 +                    /*
  1.6161 +                     * EOF Parse error.
  1.6162 +                     */
  1.6163 +                    errEofInComment();
  1.6164 +                    /* Emit the comment token. */
  1.6165 +                    emitComment(0, 0);
  1.6166 +                    /*
  1.6167 +                     * Reconsume the EOF character in the data state.
  1.6168 +                     */
  1.6169 +                    break eofloop;
  1.6170 +                case COMMENT_END:
  1.6171 +                    errEofInComment();
  1.6172 +                    /* Emit the comment token. */
  1.6173 +                    emitComment(2, 0);
  1.6174 +                    /*
  1.6175 +                     * Reconsume the EOF character in the data state.
  1.6176 +                     */
  1.6177 +                    break eofloop;
  1.6178 +                case COMMENT_END_DASH:
  1.6179 +                case COMMENT_START_DASH:
  1.6180 +                    errEofInComment();
  1.6181 +                    /* Emit the comment token. */
  1.6182 +                    emitComment(1, 0);
  1.6183 +                    /*
  1.6184 +                     * Reconsume the EOF character in the data state.
  1.6185 +                     */
  1.6186 +                    break eofloop;
  1.6187 +                case COMMENT_END_BANG:
  1.6188 +                    errEofInComment();
  1.6189 +                    /* Emit the comment token. */
  1.6190 +                    emitComment(3, 0);
  1.6191 +                    /*
  1.6192 +                     * Reconsume the EOF character in the data state.
  1.6193 +                     */
  1.6194 +                    break eofloop;
  1.6195 +                case DOCTYPE:
  1.6196 +                case BEFORE_DOCTYPE_NAME:
  1.6197 +                    errEofInDoctype();
  1.6198 +                    /*
  1.6199 +                     * Create a new DOCTYPE token. Set its force-quirks flag to
  1.6200 +                     * on.
  1.6201 +                     */
  1.6202 +                    forceQuirks = true;
  1.6203 +                    /*
  1.6204 +                     * Emit the token.
  1.6205 +                     */
  1.6206 +                    emitDoctypeToken(0);
  1.6207 +                    /*
  1.6208 +                     * Reconsume the EOF character in the data state.
  1.6209 +                     */
  1.6210 +                    break eofloop;
  1.6211 +                case DOCTYPE_NAME:
  1.6212 +                    errEofInDoctype();
  1.6213 +                    strBufToDoctypeName();
  1.6214 +                    /*
  1.6215 +                     * Set the DOCTYPE token's force-quirks flag to on.
  1.6216 +                     */
  1.6217 +                    forceQuirks = true;
  1.6218 +                    /*
  1.6219 +                     * Emit that DOCTYPE token.
  1.6220 +                     */
  1.6221 +                    emitDoctypeToken(0);
  1.6222 +                    /*
  1.6223 +                     * Reconsume the EOF character in the data state.
  1.6224 +                     */
  1.6225 +                    break eofloop;
  1.6226 +                case DOCTYPE_UBLIC:
  1.6227 +                case DOCTYPE_YSTEM:
  1.6228 +                case AFTER_DOCTYPE_NAME:
  1.6229 +                case AFTER_DOCTYPE_PUBLIC_KEYWORD:
  1.6230 +                case AFTER_DOCTYPE_SYSTEM_KEYWORD:
  1.6231 +                case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
  1.6232 +                    errEofInDoctype();
  1.6233 +                    /*
  1.6234 +                     * Set the DOCTYPE token's force-quirks flag to on.
  1.6235 +                     */
  1.6236 +                    forceQuirks = true;
  1.6237 +                    /*
  1.6238 +                     * Emit that DOCTYPE token.
  1.6239 +                     */
  1.6240 +                    emitDoctypeToken(0);
  1.6241 +                    /*
  1.6242 +                     * Reconsume the EOF character in the data state.
  1.6243 +                     */
  1.6244 +                    break eofloop;
  1.6245 +                case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
  1.6246 +                case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
  1.6247 +                    /* EOF Parse error. */
  1.6248 +                    errEofInPublicId();
  1.6249 +                    /*
  1.6250 +                     * Set the DOCTYPE token's force-quirks flag to on.
  1.6251 +                     */
  1.6252 +                    forceQuirks = true;
  1.6253 +                    /*
  1.6254 +                     * Emit that DOCTYPE token.
  1.6255 +                     */
  1.6256 +                    publicIdentifier = longStrBufToString();
  1.6257 +                    emitDoctypeToken(0);
  1.6258 +                    /*
  1.6259 +                     * Reconsume the EOF character in the data state.
  1.6260 +                     */
  1.6261 +                    break eofloop;
  1.6262 +                case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
  1.6263 +                case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
  1.6264 +                case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
  1.6265 +                    errEofInDoctype();
  1.6266 +                    /*
  1.6267 +                     * Set the DOCTYPE token's force-quirks flag to on.
  1.6268 +                     */
  1.6269 +                    forceQuirks = true;
  1.6270 +                    /*
  1.6271 +                     * Emit that DOCTYPE token.
  1.6272 +                     */
  1.6273 +                    emitDoctypeToken(0);
  1.6274 +                    /*
  1.6275 +                     * Reconsume the EOF character in the data state.
  1.6276 +                     */
  1.6277 +                    break eofloop;
  1.6278 +                case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
  1.6279 +                case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
  1.6280 +                    /* EOF Parse error. */
  1.6281 +                    errEofInSystemId();
  1.6282 +                    /*
  1.6283 +                     * Set the DOCTYPE token's force-quirks flag to on.
  1.6284 +                     */
  1.6285 +                    forceQuirks = true;
  1.6286 +                    /*
  1.6287 +                     * Emit that DOCTYPE token.
  1.6288 +                     */
  1.6289 +                    systemIdentifier = longStrBufToString();
  1.6290 +                    emitDoctypeToken(0);
  1.6291 +                    /*
  1.6292 +                     * Reconsume the EOF character in the data state.
  1.6293 +                     */
  1.6294 +                    break eofloop;
  1.6295 +                case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
  1.6296 +                    errEofInDoctype();
  1.6297 +                    /*
  1.6298 +                     * Set the DOCTYPE token's force-quirks flag to on.
  1.6299 +                     */
  1.6300 +                    forceQuirks = true;
  1.6301 +                    /*
  1.6302 +                     * Emit that DOCTYPE token.
  1.6303 +                     */
  1.6304 +                    emitDoctypeToken(0);
  1.6305 +                    /*
  1.6306 +                     * Reconsume the EOF character in the data state.
  1.6307 +                     */
  1.6308 +                    break eofloop;
  1.6309 +                case BOGUS_DOCTYPE:
  1.6310 +                    /*
  1.6311 +                     * Emit that DOCTYPE token.
  1.6312 +                     */
  1.6313 +                    emitDoctypeToken(0);
  1.6314 +                    /*
  1.6315 +                     * Reconsume the EOF character in the data state.
  1.6316 +                     */
  1.6317 +                    break eofloop;
  1.6318 +                case CONSUME_CHARACTER_REFERENCE:
  1.6319 +                    /*
  1.6320 +                     * Unlike the definition is the spec, this state does not
  1.6321 +                     * return a value and never requires the caller to
  1.6322 +                     * backtrack. This state takes care of emitting characters
  1.6323 +                     * or appending to the current attribute value. It also
  1.6324 +                     * takes care of that in the case when consuming the entity
  1.6325 +                     * fails.
  1.6326 +                     */
  1.6327 +                    /*
  1.6328 +                     * This section defines how to consume an entity. This
  1.6329 +                     * definition is used when parsing entities in text and in
  1.6330 +                     * attributes.
  1.6331 +                     * 
  1.6332 +                     * The behavior depends on the identity of the next
  1.6333 +                     * character (the one immediately after the U+0026 AMPERSAND
  1.6334 +                     * character):
  1.6335 +                     */
  1.6336 +
  1.6337 +                    emitOrAppendStrBuf(returnState);
  1.6338 +                    state = returnState;
  1.6339 +                    continue;
  1.6340 +                case CHARACTER_REFERENCE_HILO_LOOKUP:
  1.6341 +                    errNoNamedCharacterMatch();
  1.6342 +                    emitOrAppendStrBuf(returnState);
  1.6343 +                    state = returnState;
  1.6344 +                    continue;
  1.6345 +                case CHARACTER_REFERENCE_TAIL:
  1.6346 +                    outer: for (;;) {
  1.6347 +                        char c = '\u0000';
  1.6348 +                        entCol++;
  1.6349 +                        /*
  1.6350 +                         * Consume the maximum number of characters possible,
  1.6351 +                         * with the consumed characters matching one of the
  1.6352 +                         * identifiers in the first column of the named
  1.6353 +                         * character references table (in a case-sensitive
  1.6354 +                         * manner).
  1.6355 +                         */
  1.6356 +                        hiloop: for (;;) {
  1.6357 +                            if (hi == -1) {
  1.6358 +                                break hiloop;
  1.6359 +                            }
  1.6360 +                            if (entCol == NamedCharacters.NAMES[hi].length()) {
  1.6361 +                                break hiloop;
  1.6362 +                            }
  1.6363 +                            if (entCol > NamedCharacters.NAMES[hi].length()) {
  1.6364 +                                break outer;
  1.6365 +                            } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) {
  1.6366 +                                hi--;
  1.6367 +                            } else {
  1.6368 +                                break hiloop;
  1.6369 +                            }
  1.6370 +                        }
  1.6371 +
  1.6372 +                        loloop: for (;;) {
  1.6373 +                            if (hi < lo) {
  1.6374 +                                break outer;
  1.6375 +                            }
  1.6376 +                            if (entCol == NamedCharacters.NAMES[lo].length()) {
  1.6377 +                                candidate = lo;
  1.6378 +                                strBufMark = strBufLen;
  1.6379 +                                lo++;
  1.6380 +                            } else if (entCol > NamedCharacters.NAMES[lo].length()) {
  1.6381 +                                break outer;
  1.6382 +                            } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
  1.6383 +                                lo++;
  1.6384 +                            } else {
  1.6385 +                                break loloop;
  1.6386 +                            }
  1.6387 +                        }
  1.6388 +                        if (hi < lo) {
  1.6389 +                            break outer;
  1.6390 +                        }
  1.6391 +                        continue;
  1.6392 +                    }
  1.6393 +
  1.6394 +                    if (candidate == -1) {
  1.6395 +                        /*
  1.6396 +                         * If no match can be made, then this is a parse error.
  1.6397 +                         */
  1.6398 +                        errNoNamedCharacterMatch();
  1.6399 +                        emitOrAppendStrBuf(returnState);
  1.6400 +                        state = returnState;
  1.6401 +                        continue eofloop;
  1.6402 +                    } else {
  1.6403 +                        @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
  1.6404 +                        if (candidateName.length() == 0
  1.6405 +                                || candidateName.charAt(candidateName.length() - 1) != ';') {
  1.6406 +                            /*
  1.6407 +                             * If the last character matched is not a U+003B
  1.6408 +                             * SEMICOLON (;), there is a parse error.
  1.6409 +                             */
  1.6410 +                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.6411 +                                /*
  1.6412 +                                 * If the entity is being consumed as part of an
  1.6413 +                                 * attribute, and the last character matched is
  1.6414 +                                 * not a U+003B SEMICOLON (;),
  1.6415 +                                 */
  1.6416 +                                char ch;
  1.6417 +                                if (strBufMark == strBufLen) {
  1.6418 +                                    ch = '\u0000';
  1.6419 +                                } else {
  1.6420 +                                    ch = strBuf[strBufMark];
  1.6421 +                                }
  1.6422 +                                if ((ch >= '0' && ch <= '9')
  1.6423 +                                        || (ch >= 'A' && ch <= 'Z')
  1.6424 +                                        || (ch >= 'a' && ch <= 'z')) {
  1.6425 +                                    /*
  1.6426 +                                     * and the next character is in the range
  1.6427 +                                     * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
  1.6428 +                                     * U+0041 LATIN CAPITAL LETTER A to U+005A
  1.6429 +                                     * LATIN CAPITAL LETTER Z, or U+0061 LATIN
  1.6430 +                                     * SMALL LETTER A to U+007A LATIN SMALL
  1.6431 +                                     * LETTER Z, then, for historical reasons,
  1.6432 +                                     * all the characters that were matched
  1.6433 +                                     * after the U+0026 AMPERSAND (&) must be
  1.6434 +                                     * unconsumed, and nothing is returned.
  1.6435 +                                     */
  1.6436 +                                    errNoNamedCharacterMatch();
  1.6437 +                                    appendStrBufToLongStrBuf();
  1.6438 +                                    state = returnState;
  1.6439 +                                    continue eofloop;
  1.6440 +                                }
  1.6441 +                            }
  1.6442 +                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.6443 +                                errUnescapedAmpersandInterpretedAsCharacterReference();
  1.6444 +                            } else {
  1.6445 +                                errNotSemicolonTerminated();
  1.6446 +                            }
  1.6447 +                        }
  1.6448 +
  1.6449 +                        /*
  1.6450 +                         * Otherwise, return a character token for the character
  1.6451 +                         * corresponding to the entity name (as given by the
  1.6452 +                         * second column of the named character references
  1.6453 +                         * table).
  1.6454 +                         */
  1.6455 +                        @Const @NoLength char[] val = NamedCharacters.VALUES[candidate];
  1.6456 +                        if (
  1.6457 +                        // [NOCPP[
  1.6458 +                        val.length == 1
  1.6459 +                        // ]NOCPP]
  1.6460 +                        // CPPONLY: val[1] == 0
  1.6461 +                        ) {
  1.6462 +                            emitOrAppendOne(val, returnState);
  1.6463 +                        } else {
  1.6464 +                            emitOrAppendTwo(val, returnState);
  1.6465 +                        }
  1.6466 +                        // this is so complicated!
  1.6467 +                        if (strBufMark < strBufLen) {
  1.6468 +                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.6469 +                                for (int i = strBufMark; i < strBufLen; i++) {
  1.6470 +                                    appendLongStrBuf(strBuf[i]);
  1.6471 +                                }
  1.6472 +                            } else {
  1.6473 +                                tokenHandler.characters(strBuf, strBufMark,
  1.6474 +                                        strBufLen - strBufMark);
  1.6475 +                            }
  1.6476 +                        }
  1.6477 +                        state = returnState;
  1.6478 +                        continue eofloop;
  1.6479 +                        /*
  1.6480 +                         * If the markup contains I'm &notit; I tell you, the
  1.6481 +                         * entity is parsed as "not", as in, I'm ¬it; I tell
  1.6482 +                         * you. But if the markup was I'm &notin; I tell you,
  1.6483 +                         * the entity would be parsed as "notin;", resulting in
  1.6484 +                         * I'm ∉ I tell you.
  1.6485 +                         */
  1.6486 +                    }
  1.6487 +                case CONSUME_NCR:
  1.6488 +                case DECIMAL_NRC_LOOP:
  1.6489 +                case HEX_NCR_LOOP:
  1.6490 +                    /*
  1.6491 +                     * If no characters match the range, then don't consume any
  1.6492 +                     * characters (and unconsume the U+0023 NUMBER SIGN
  1.6493 +                     * character and, if appropriate, the X character). This is
  1.6494 +                     * a parse error; nothing is returned.
  1.6495 +                     * 
  1.6496 +                     * Otherwise, if the next character is a U+003B SEMICOLON,
  1.6497 +                     * consume that too. If it isn't, there is a parse error.
  1.6498 +                     */
  1.6499 +                    if (!seenDigits) {
  1.6500 +                        errNoDigitsInNCR();
  1.6501 +                        emitOrAppendStrBuf(returnState);
  1.6502 +                        state = returnState;
  1.6503 +                        continue;
  1.6504 +                    } else {
  1.6505 +                        errCharRefLacksSemicolon();
  1.6506 +                    }
  1.6507 +                    // WARNING previous state sets reconsume
  1.6508 +                    handleNcrValue(returnState);
  1.6509 +                    state = returnState;
  1.6510 +                    continue;
  1.6511 +                case CDATA_RSQB:
  1.6512 +                    tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);
  1.6513 +                    break eofloop;
  1.6514 +                case CDATA_RSQB_RSQB:
  1.6515 +                    tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
  1.6516 +                    break eofloop;
  1.6517 +                case DATA:
  1.6518 +                default:
  1.6519 +                    break eofloop;
  1.6520 +            }
  1.6521 +        }
  1.6522 +        // case DATA:
  1.6523 +        /*
  1.6524 +         * EOF Emit an end-of-file token.
  1.6525 +         */
  1.6526 +        tokenHandler.eof();
  1.6527 +        return;
  1.6528 +    }
  1.6529 +
  1.6530 +    private void emitDoctypeToken(int pos) throws SAXException {
  1.6531 +        cstart = pos + 1;
  1.6532 +        tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier,
  1.6533 +                forceQuirks);
  1.6534 +        // It is OK and sufficient to release these here, since
  1.6535 +        // there's no way out of the doctype states than through paths
  1.6536 +        // that call this method.
  1.6537 +        doctypeName = null;
  1.6538 +        Portability.releaseString(publicIdentifier);
  1.6539 +        publicIdentifier = null;
  1.6540 +        Portability.releaseString(systemIdentifier);
  1.6541 +        systemIdentifier = null;
  1.6542 +    }
  1.6543 +
  1.6544 +    @Inline protected char checkChar(@NoLength char[] buf, int pos)
  1.6545 +            throws SAXException {
  1.6546 +        return buf[pos];
  1.6547 +    }
  1.6548 +
  1.6549 +    public boolean internalEncodingDeclaration(String internalCharset)
  1.6550 +            throws SAXException {
  1.6551 +        if (encodingDeclarationHandler != null) {
  1.6552 +            return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset);
  1.6553 +        }
  1.6554 +        return false;
  1.6555 +    }
  1.6556 +
  1.6557 +    /**
  1.6558 +     * @param val
  1.6559 +     * @throws SAXException
  1.6560 +     */
  1.6561 +    private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
  1.6562 +            throws SAXException {
  1.6563 +        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.6564 +            appendLongStrBuf(val[0]);
  1.6565 +            appendLongStrBuf(val[1]);
  1.6566 +        } else {
  1.6567 +            tokenHandler.characters(val, 0, 2);
  1.6568 +        }
  1.6569 +    }
  1.6570 +
  1.6571 +    private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
  1.6572 +            throws SAXException {
  1.6573 +        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
  1.6574 +            appendLongStrBuf(val[0]);
  1.6575 +        } else {
  1.6576 +            tokenHandler.characters(val, 0, 1);
  1.6577 +        }
  1.6578 +    }
  1.6579 +
  1.6580 +    public void end() throws SAXException {
  1.6581 +        strBuf = null;
  1.6582 +        longStrBuf = null;
  1.6583 +        doctypeName = null;
  1.6584 +        if (systemIdentifier != null) {
  1.6585 +            Portability.releaseString(systemIdentifier);
  1.6586 +            systemIdentifier = null;
  1.6587 +        }
  1.6588 +        if (publicIdentifier != null) {
  1.6589 +            Portability.releaseString(publicIdentifier);
  1.6590 +            publicIdentifier = null;
  1.6591 +        }
  1.6592 +        if (tagName != null) {
  1.6593 +            tagName.release();
  1.6594 +            tagName = null;
  1.6595 +        }
  1.6596 +        if (attributeName != null) {
  1.6597 +            attributeName.release();
  1.6598 +            attributeName = null;
  1.6599 +        }
  1.6600 +        tokenHandler.endTokenization();
  1.6601 +        if (attributes != null) {
  1.6602 +            // [NOCPP[
  1.6603 +            attributes = null;
  1.6604 +            // ]NOCPP]
  1.6605 +            // CPPONLY: attributes.clear(mappingLangToXmlLang);
  1.6606 +        }
  1.6607 +    }
  1.6608 +
  1.6609 +    public void requestSuspension() {
  1.6610 +        shouldSuspend = true;
  1.6611 +    }
  1.6612 +
  1.6613 +    // [NOCPP[
  1.6614 +    
  1.6615 +    public void becomeConfident() {
  1.6616 +        confident = true;
  1.6617 +    }
  1.6618 +
  1.6619 +    /**
  1.6620 +     * Returns the nextCharOnNewLine.
  1.6621 +     * 
  1.6622 +     * @return the nextCharOnNewLine
  1.6623 +     */
  1.6624 +    public boolean isNextCharOnNewLine() {
  1.6625 +        return false;
  1.6626 +    }
  1.6627 +
  1.6628 +    public boolean isPrevCR() {
  1.6629 +        return lastCR;
  1.6630 +    }
  1.6631 +
  1.6632 +    /**
  1.6633 +     * Returns the line.
  1.6634 +     * 
  1.6635 +     * @return the line
  1.6636 +     */
  1.6637 +    public int getLine() {
  1.6638 +        return -1;
  1.6639 +    }
  1.6640 +
  1.6641 +    /**
  1.6642 +     * Returns the col.
  1.6643 +     * 
  1.6644 +     * @return the col
  1.6645 +     */
  1.6646 +    public int getCol() {
  1.6647 +        return -1;
  1.6648 +    }
  1.6649 +
  1.6650 +    // ]NOCPP]
  1.6651 +    
  1.6652 +    public boolean isInDataState() {
  1.6653 +        return (stateSave == DATA);
  1.6654 +    }
  1.6655 +
  1.6656 +    public void resetToDataState() {
  1.6657 +        strBufLen = 0;
  1.6658 +        longStrBufLen = 0;
  1.6659 +        stateSave = Tokenizer.DATA;
  1.6660 +        // line = 1; XXX line numbers
  1.6661 +        lastCR = false;
  1.6662 +        index = 0;
  1.6663 +        forceQuirks = false;
  1.6664 +        additional = '\u0000';
  1.6665 +        entCol = -1;
  1.6666 +        firstCharKey = -1;
  1.6667 +        lo = 0;
  1.6668 +        hi = 0; // will always be overwritten before use anyway
  1.6669 +        candidate = -1;
  1.6670 +        strBufMark = 0;
  1.6671 +        prevValue = -1;
  1.6672 +        value = 0;
  1.6673 +        seenDigits = false;
  1.6674 +        endTag = false;
  1.6675 +        shouldSuspend = false;
  1.6676 +        initDoctypeFields();
  1.6677 +        if (tagName != null) {
  1.6678 +            tagName.release();
  1.6679 +            tagName = null;
  1.6680 +        }
  1.6681 +        if (attributeName != null) {
  1.6682 +            attributeName.release();
  1.6683 +            attributeName = null;
  1.6684 +        }
  1.6685 +        if (newAttributesEachTime) {
  1.6686 +            if (attributes != null) {
  1.6687 +                Portability.delete(attributes);
  1.6688 +                attributes = null;
  1.6689 +            }
  1.6690 +        }
  1.6691 +    }
  1.6692 +
  1.6693 +    public void loadState(Tokenizer other) throws SAXException {
  1.6694 +        strBufLen = other.strBufLen;
  1.6695 +        if (strBufLen > strBuf.length) {
  1.6696 +            strBuf = new char[strBufLen];
  1.6697 +        }
  1.6698 +        System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen);
  1.6699 +
  1.6700 +        longStrBufLen = other.longStrBufLen;
  1.6701 +        if (longStrBufLen > longStrBuf.length) {
  1.6702 +            longStrBuf = new char[longStrBufLen];
  1.6703 +        }
  1.6704 +        System.arraycopy(other.longStrBuf, 0, longStrBuf, 0, longStrBufLen);
  1.6705 +
  1.6706 +        stateSave = other.stateSave;
  1.6707 +        returnStateSave = other.returnStateSave;
  1.6708 +        endTagExpectation = other.endTagExpectation;
  1.6709 +        endTagExpectationAsArray = other.endTagExpectationAsArray;
  1.6710 +        // line = 1; XXX line numbers
  1.6711 +        lastCR = other.lastCR;
  1.6712 +        index = other.index;
  1.6713 +        forceQuirks = other.forceQuirks;
  1.6714 +        additional = other.additional;
  1.6715 +        entCol = other.entCol;
  1.6716 +        firstCharKey = other.firstCharKey;
  1.6717 +        lo = other.lo;
  1.6718 +        hi = other.hi;
  1.6719 +        candidate = other.candidate;
  1.6720 +        strBufMark = other.strBufMark;
  1.6721 +        prevValue = other.prevValue;
  1.6722 +        value = other.value;
  1.6723 +        seenDigits = other.seenDigits;
  1.6724 +        endTag = other.endTag;
  1.6725 +        shouldSuspend = false;
  1.6726 +
  1.6727 +        if (other.doctypeName == null) {
  1.6728 +            doctypeName = null;
  1.6729 +        } else {
  1.6730 +            doctypeName = Portability.newLocalFromLocal(other.doctypeName,
  1.6731 +                    interner);
  1.6732 +        }
  1.6733 +
  1.6734 +        Portability.releaseString(systemIdentifier);
  1.6735 +        if (other.systemIdentifier == null) {
  1.6736 +            systemIdentifier = null;
  1.6737 +        } else {
  1.6738 +            systemIdentifier = Portability.newStringFromString(other.systemIdentifier);
  1.6739 +        }
  1.6740 +
  1.6741 +        Portability.releaseString(publicIdentifier);
  1.6742 +        if (other.publicIdentifier == null) {
  1.6743 +            publicIdentifier = null;
  1.6744 +        } else {
  1.6745 +            publicIdentifier = Portability.newStringFromString(other.publicIdentifier);
  1.6746 +        }
  1.6747 +
  1.6748 +        if (tagName != null) {
  1.6749 +            tagName.release();
  1.6750 +        }
  1.6751 +        if (other.tagName == null) {
  1.6752 +            tagName = null;
  1.6753 +        } else {
  1.6754 +            tagName = other.tagName.cloneElementName(interner);
  1.6755 +        }
  1.6756 +
  1.6757 +        if (attributeName != null) {
  1.6758 +            attributeName.release();
  1.6759 +        }
  1.6760 +        if (other.attributeName == null) {
  1.6761 +            attributeName = null;
  1.6762 +        } else {
  1.6763 +            attributeName = other.attributeName.cloneAttributeName(interner);
  1.6764 +        }
  1.6765 +
  1.6766 +        Portability.delete(attributes);
  1.6767 +        if (other.attributes == null) {
  1.6768 +            attributes = null;
  1.6769 +        } else {
  1.6770 +            attributes = other.attributes.cloneAttributes(interner);
  1.6771 +        }
  1.6772 +    }
  1.6773 +
  1.6774 +    public void initializeWithoutStarting() throws SAXException {
  1.6775 +        confident = false;
  1.6776 +        strBuf = new char[64];
  1.6777 +        longStrBuf = new char[1024];
  1.6778 +        line = 1;
  1.6779 +        // [NOCPP[
  1.6780 +        html4 = false;
  1.6781 +        metaBoundaryPassed = false;
  1.6782 +        wantsComments = tokenHandler.wantsComments();
  1.6783 +        if (!newAttributesEachTime) {
  1.6784 +            attributes = new HtmlAttributes(mappingLangToXmlLang);
  1.6785 +        }
  1.6786 +        // ]NOCPP]
  1.6787 +        resetToDataState();
  1.6788 +    }
  1.6789 +
  1.6790 +    protected void errGarbageAfterLtSlash() throws SAXException {
  1.6791 +    }
  1.6792 +
  1.6793 +    protected void errLtSlashGt() throws SAXException {
  1.6794 +    }
  1.6795 +
  1.6796 +    protected void errWarnLtSlashInRcdata() throws SAXException {
  1.6797 +    }
  1.6798 +
  1.6799 +    protected void errHtml4LtSlashInRcdata(char folded) throws SAXException {
  1.6800 +    }
  1.6801 +
  1.6802 +    protected void errCharRefLacksSemicolon() throws SAXException {
  1.6803 +    }
  1.6804 +
  1.6805 +    protected void errNoDigitsInNCR() throws SAXException {
  1.6806 +    }
  1.6807 +
  1.6808 +    protected void errGtInSystemId() throws SAXException {
  1.6809 +    }
  1.6810 +
  1.6811 +    protected void errGtInPublicId() throws SAXException {
  1.6812 +    }
  1.6813 +
  1.6814 +    protected void errNamelessDoctype() throws SAXException {
  1.6815 +    }
  1.6816 +
  1.6817 +    protected void errConsecutiveHyphens() throws SAXException {
  1.6818 +    }
  1.6819 +
  1.6820 +    protected void errPrematureEndOfComment() throws SAXException {
  1.6821 +    }
  1.6822 +
  1.6823 +    protected void errBogusComment() throws SAXException {
  1.6824 +    }
  1.6825 +
  1.6826 +    protected void errUnquotedAttributeValOrNull(char c) throws SAXException {
  1.6827 +    }
  1.6828 +
  1.6829 +    protected void errSlashNotFollowedByGt() throws SAXException {
  1.6830 +    }
  1.6831 +
  1.6832 +    protected void errHtml4XmlVoidSyntax() throws SAXException {
  1.6833 +    }
  1.6834 +
  1.6835 +    protected void errNoSpaceBetweenAttributes() throws SAXException {
  1.6836 +    }
  1.6837 +
  1.6838 +    protected void errHtml4NonNameInUnquotedAttribute(char c)
  1.6839 +            throws SAXException {
  1.6840 +    }
  1.6841 +
  1.6842 +    protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
  1.6843 +            throws SAXException {
  1.6844 +    }
  1.6845 +
  1.6846 +    protected void errAttributeValueMissing() throws SAXException {
  1.6847 +    }
  1.6848 +
  1.6849 +    protected void errBadCharBeforeAttributeNameOrNull(char c)
  1.6850 +            throws SAXException {
  1.6851 +    }
  1.6852 +
  1.6853 +    protected void errEqualsSignBeforeAttributeName() throws SAXException {
  1.6854 +    }
  1.6855 +
  1.6856 +    protected void errBadCharAfterLt(char c) throws SAXException {
  1.6857 +    }
  1.6858 +
  1.6859 +    protected void errLtGt() throws SAXException {
  1.6860 +    }
  1.6861 +
  1.6862 +    protected void errProcessingInstruction() throws SAXException {
  1.6863 +    }
  1.6864 +
  1.6865 +    protected void errUnescapedAmpersandInterpretedAsCharacterReference()
  1.6866 +            throws SAXException {
  1.6867 +    }
  1.6868 +
  1.6869 +    protected void errNotSemicolonTerminated() throws SAXException {
  1.6870 +    }
  1.6871 +
  1.6872 +    protected void errNoNamedCharacterMatch() throws SAXException {
  1.6873 +    }
  1.6874 +
  1.6875 +    protected void errQuoteBeforeAttributeName(char c) throws SAXException {
  1.6876 +    }
  1.6877 +
  1.6878 +    protected void errQuoteOrLtInAttributeNameOrNull(char c)
  1.6879 +            throws SAXException {
  1.6880 +    }
  1.6881 +
  1.6882 +    protected void errExpectedPublicId() throws SAXException {
  1.6883 +    }
  1.6884 +
  1.6885 +    protected void errBogusDoctype() throws SAXException {
  1.6886 +    }
  1.6887 +
  1.6888 +    protected void maybeWarnPrivateUseAstral() throws SAXException {
  1.6889 +    }
  1.6890 +
  1.6891 +    protected void maybeWarnPrivateUse(char ch) throws SAXException {
  1.6892 +    }
  1.6893 +
  1.6894 +    protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs)
  1.6895 +            throws SAXException {
  1.6896 +    }
  1.6897 +
  1.6898 +    protected void maybeErrSlashInEndTag(boolean selfClosing)
  1.6899 +            throws SAXException {
  1.6900 +    }
  1.6901 +
  1.6902 +    protected char errNcrNonCharacter(char ch) throws SAXException {
  1.6903 +        return ch;
  1.6904 +    }
  1.6905 +
  1.6906 +    protected void errAstralNonCharacter(int ch) throws SAXException {
  1.6907 +    }
  1.6908 +
  1.6909 +    protected void errNcrSurrogate() throws SAXException {
  1.6910 +    }
  1.6911 +
  1.6912 +    protected char errNcrControlChar(char ch) throws SAXException {
  1.6913 +        return ch;
  1.6914 +    }
  1.6915 +
  1.6916 +    protected void errNcrCr() throws SAXException {
  1.6917 +    }
  1.6918 +
  1.6919 +    protected void errNcrInC1Range() throws SAXException {
  1.6920 +    }
  1.6921 +
  1.6922 +    protected void errEofInPublicId() throws SAXException {
  1.6923 +    }
  1.6924 +
  1.6925 +    protected void errEofInComment() throws SAXException {
  1.6926 +    }
  1.6927 +
  1.6928 +    protected void errEofInDoctype() throws SAXException {
  1.6929 +    }
  1.6930 +
  1.6931 +    protected void errEofInAttributeValue() throws SAXException {
  1.6932 +    }
  1.6933 +
  1.6934 +    protected void errEofInAttributeName() throws SAXException {
  1.6935 +    }
  1.6936 +
  1.6937 +    protected void errEofWithoutGt() throws SAXException {
  1.6938 +    }
  1.6939 +
  1.6940 +    protected void errEofInTagName() throws SAXException {
  1.6941 +    }
  1.6942 +
  1.6943 +    protected void errEofInEndTag() throws SAXException {
  1.6944 +    }
  1.6945 +
  1.6946 +    protected void errEofAfterLt() throws SAXException {
  1.6947 +    }
  1.6948 +
  1.6949 +    protected void errNcrOutOfRange() throws SAXException {
  1.6950 +    }
  1.6951 +
  1.6952 +    protected void errNcrUnassigned() throws SAXException {
  1.6953 +    }
  1.6954 +
  1.6955 +    protected void errDuplicateAttribute() throws SAXException {
  1.6956 +    }
  1.6957 +
  1.6958 +    protected void errEofInSystemId() throws SAXException {
  1.6959 +    }
  1.6960 +
  1.6961 +    protected void errExpectedSystemId() throws SAXException {
  1.6962 +    }
  1.6963 +
  1.6964 +    protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
  1.6965 +    }
  1.6966 +
  1.6967 +    protected void errHyphenHyphenBang() throws SAXException {
  1.6968 +    }
  1.6969 +
  1.6970 +    protected void errNcrControlChar() throws SAXException {
  1.6971 +    }
  1.6972 +
  1.6973 +    protected void errNcrZero() throws SAXException {
  1.6974 +    }
  1.6975 +
  1.6976 +    protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
  1.6977 +            throws SAXException {
  1.6978 +    }
  1.6979 +
  1.6980 +    protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException {
  1.6981 +    }
  1.6982 +
  1.6983 +    protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
  1.6984 +            throws SAXException {
  1.6985 +    }
  1.6986 +
  1.6987 +    protected void noteAttributeWithoutValue() throws SAXException {
  1.6988 +    }
  1.6989 +
  1.6990 +    protected void noteUnquotedAttributeValue() throws SAXException {
  1.6991 +    }
  1.6992 +
  1.6993 +    /**
  1.6994 +     * Sets the encodingDeclarationHandler.
  1.6995 +     * 
  1.6996 +     * @param encodingDeclarationHandler
  1.6997 +     *            the encodingDeclarationHandler to set
  1.6998 +     */
  1.6999 +    public void setEncodingDeclarationHandler(
  1.7000 +            EncodingDeclarationHandler encodingDeclarationHandler) {
  1.7001 +        this.encodingDeclarationHandler = encodingDeclarationHandler;
  1.7002 +    }
  1.7003 +    
  1.7004 +    void destructor() {
  1.7005 +        // The translator will write refcount tracing stuff here
  1.7006 +        Portability.delete(attributes);
  1.7007 +        attributes = null;
  1.7008 +    }
  1.7009 +    
  1.7010 +    // [NOCPP[
  1.7011 +    
  1.7012 +    /**
  1.7013 +     * Sets an offset to be added to the position reported to 
  1.7014 +     * <code>TransitionHandler</code>.
  1.7015 +     * 
  1.7016 +     * @param offset the offset
  1.7017 +     */
  1.7018 +    public void setTransitionBaseOffset(int offset) {
  1.7019 +        
  1.7020 +    }
  1.7021 +    
  1.7022 +    // ]NOCPP]
  1.7023 +
  1.7024 +}

mercurial