parser/html/javasrc/MetaScanner.java

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  * Copyright (c) 2007 Henri Sivonen
     3  * Copyright (c) 2008-2010 Mozilla Foundation
     4  *
     5  * Permission is hereby granted, free of charge, to any person obtaining a 
     6  * copy of this software and associated documentation files (the "Software"), 
     7  * to deal in the Software without restriction, including without limitation 
     8  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
     9  * and/or sell copies of the Software, and to permit persons to whom the 
    10  * Software is furnished to do so, subject to the following conditions:
    11  *
    12  * The above copyright notice and this permission notice shall be included in 
    13  * all copies or substantial portions of the Software.
    14  *
    15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
    16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
    17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
    18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
    19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
    20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
    21  * DEALINGS IN THE SOFTWARE.
    22  */
    24 package nu.validator.htmlparser.impl;
    26 import java.io.IOException;
    28 import nu.validator.htmlparser.annotation.Auto;
    29 import nu.validator.htmlparser.annotation.Inline;
    30 import nu.validator.htmlparser.common.ByteReadable;
    32 import org.xml.sax.SAXException;
    34 public abstract class MetaScanner {
    36     /**
    37      * Constant for "charset".
    38      */
    39     private static final char[] CHARSET = { 'h', 'a', 'r', 's', 'e', 't' };
    41     /**
    42      * Constant for "content".
    43      */
    44     private static final char[] CONTENT = { 'o', 'n', 't', 'e', 'n', 't' };
    46     /**
    47      * Constant for "http-equiv".
    48      */
    49     private static final char[] HTTP_EQUIV = { 't', 't', 'p', '-', 'e', 'q',
    50             'u', 'i', 'v' };
    52     /**
    53      * Constant for "content-type".
    54      */
    55     private static final char[] CONTENT_TYPE = { 'c', 'o', 'n', 't', 'e', 'n',
    56             't', '-', 't', 'y', 'p', 'e' };
    58     private static final int NO = 0;
    60     private static final int M = 1;
    62     private static final int E = 2;
    64     private static final int T = 3;
    66     private static final int A = 4;
    68     private static final int DATA = 0;
    70     private static final int TAG_OPEN = 1;
    72     private static final int SCAN_UNTIL_GT = 2;
    74     private static final int TAG_NAME = 3;
    76     private static final int BEFORE_ATTRIBUTE_NAME = 4;
    78     private static final int ATTRIBUTE_NAME = 5;
    80     private static final int AFTER_ATTRIBUTE_NAME = 6;
    82     private static final int BEFORE_ATTRIBUTE_VALUE = 7;
    84     private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
    86     private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
    88     private static final int ATTRIBUTE_VALUE_UNQUOTED = 10;
    90     private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
    92     private static final int MARKUP_DECLARATION_OPEN = 13;
    94     private static final int MARKUP_DECLARATION_HYPHEN = 14;
    96     private static final int COMMENT_START = 15;
    98     private static final int COMMENT_START_DASH = 16;
   100     private static final int COMMENT = 17;
   102     private static final int COMMENT_END_DASH = 18;
   104     private static final int COMMENT_END = 19;
   106     private static final int SELF_CLOSING_START_TAG = 20;
   108     private static final int HTTP_EQUIV_NOT_SEEN = 0;
   110     private static final int HTTP_EQUIV_CONTENT_TYPE = 1;
   112     private static final int HTTP_EQUIV_OTHER = 2;
   114     /**
   115      * The data source.
   116      */
   117     protected ByteReadable readable;
   119     /**
   120      * The state of the state machine that recognizes the tag name "meta".
   121      */
   122     private int metaState = NO;
   124     /**
   125      * The current position in recognizing the attribute name "content".
   126      */
   127     private int contentIndex = Integer.MAX_VALUE;
   129     /**
   130      * The current position in recognizing the attribute name "charset".
   131      */
   132     private int charsetIndex = Integer.MAX_VALUE;
   134     /**
   135      * The current position in recognizing the attribute name "http-equive".
   136      */
   137     private int httpEquivIndex = Integer.MAX_VALUE;
   139     /**
   140      * The current position in recognizing the attribute value "content-type".
   141      */
   142     private int contentTypeIndex = Integer.MAX_VALUE;
   144     /**
   145      * The tokenizer state.
   146      */
   147     protected int stateSave = DATA;
   149     /**
   150      * The currently filled length of strBuf.
   151      */
   152     private int strBufLen;
   154     /**
   155      * Accumulation buffer for attribute values.
   156      */
   157     private @Auto char[] strBuf;
   159     private String content;
   161     private String charset;
   163     private int httpEquivState;
   165     public MetaScanner() {
   166         this.readable = null;
   167         this.metaState = NO;
   168         this.contentIndex = Integer.MAX_VALUE;
   169         this.charsetIndex = Integer.MAX_VALUE;
   170         this.httpEquivIndex = Integer.MAX_VALUE;
   171         this.contentTypeIndex = Integer.MAX_VALUE;
   172         this.stateSave = DATA;
   173         this.strBufLen = 0;
   174         this.strBuf = new char[36];
   175         this.content = null;
   176         this.charset = null;
   177         this.httpEquivState = HTTP_EQUIV_NOT_SEEN;
   178     }
   180     @SuppressWarnings("unused") private void destructor() {
   181         Portability.releaseString(content);
   182         Portability.releaseString(charset);
   183     }
   185     // [NOCPP[
   187     /**
   188      * Reads a byte from the data source.
   189      * 
   190      * -1 means end.
   191      * @return
   192      * @throws IOException
   193      */
   194     protected int read() throws IOException {
   195         return readable.readByte();
   196     }
   198     // ]NOCPP]
   200     // WARNING When editing this, makes sure the bytecode length shown by javap
   201     // stays under 8000 bytes!
   202     /**
   203      * The runs the meta scanning algorithm.
   204      */
   205     protected final void stateLoop(int state)
   206             throws SAXException, IOException {
   207         int c = -1;
   208         boolean reconsume = false;
   209         stateloop: for (;;) {
   210             switch (state) {
   211                 case DATA:
   212                     dataloop: for (;;) {
   213                         if (reconsume) {
   214                             reconsume = false;
   215                         } else {
   216                             c = read();
   217                         }
   218                         switch (c) {
   219                             case -1:
   220                                 break stateloop;
   221                             case '<':
   222                                 state = MetaScanner.TAG_OPEN;
   223                                 break dataloop; // FALL THROUGH continue
   224                             // stateloop;
   225                             default:
   226                                 continue;
   227                         }
   228                     }
   229                     // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
   230                 case TAG_OPEN:
   231                     tagopenloop: for (;;) {
   232                         c = read();
   233                         switch (c) {
   234                             case -1:
   235                                 break stateloop;
   236                             case 'm':
   237                             case 'M':
   238                                 metaState = M;
   239                                 state = MetaScanner.TAG_NAME;
   240                                 break tagopenloop;
   241                                 // continue stateloop;                                
   242                             case '!':
   243                                 state = MetaScanner.MARKUP_DECLARATION_OPEN;
   244                                 continue stateloop;
   245                             case '?':
   246                             case '/':
   247                                 state = MetaScanner.SCAN_UNTIL_GT;
   248                                 continue stateloop;
   249                             case '>':
   250                                 state = MetaScanner.DATA;
   251                                 continue stateloop;
   252                             default:
   253                                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
   254                                     metaState = NO;
   255                                     state = MetaScanner.TAG_NAME;
   256                                     break tagopenloop;
   257                                     // continue stateloop;
   258                                 }
   259                                 state = MetaScanner.DATA;
   260                                 reconsume = true;
   261                                 continue stateloop;
   262                         }
   263                     }
   264                     // FALL THROUGH DON'T REORDER
   265                 case TAG_NAME:
   266                     tagnameloop: for (;;) {
   267                         c = read();
   268                         switch (c) {
   269                             case -1:
   270                                 break stateloop;
   271                             case ' ':
   272                             case '\t':
   273                             case '\n':
   274                             case '\u000C':
   275                                 state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
   276                                 break tagnameloop;
   277                             // continue stateloop;
   278                             case '/':
   279                                 state = MetaScanner.SELF_CLOSING_START_TAG;
   280                                 continue stateloop;
   281                             case '>':
   282                                 state = MetaScanner.DATA;
   283                                 continue stateloop;
   284                             case 'e':
   285                             case 'E':
   286                                 if (metaState == M) {
   287                                     metaState = E;
   288                                 } else {
   289                                     metaState = NO;
   290                                 }
   291                                 continue;
   292                             case 't':
   293                             case 'T':
   294                                 if (metaState == E) {
   295                                     metaState = T;
   296                                 } else {
   297                                     metaState = NO;
   298                                 }
   299                                 continue;
   300                             case 'a':
   301                             case 'A':
   302                                 if (metaState == T) {
   303                                     metaState = A;
   304                                 } else {
   305                                     metaState = NO;
   306                                 }
   307                                 continue;
   308                             default:
   309                                 metaState = NO;
   310                                 continue;
   311                         }
   312                     }
   313                     // FALLTHRU DON'T REORDER
   314                 case BEFORE_ATTRIBUTE_NAME:
   315                     beforeattributenameloop: for (;;) {
   316                         if (reconsume) {
   317                             reconsume = false;
   318                         } else {
   319                             c = read();
   320                         }
   321                         /*
   322                          * Consume the next input character:
   323                          */
   324                         switch (c) {
   325                             case -1:
   326                                 break stateloop;
   327                             case ' ':
   328                             case '\t':
   329                             case '\n':
   330                             case '\u000C':
   331                                 continue;
   332                             case '/':
   333                                 state = MetaScanner.SELF_CLOSING_START_TAG;
   334                                 continue stateloop;
   335                             case '>':
   336                                 if (handleTag()) {
   337                                     break stateloop;
   338                                 }
   339                                 state = DATA;
   340                                 continue stateloop;
   341                             case 'c':
   342                             case 'C':
   343                                 contentIndex = 0;
   344                                 charsetIndex = 0;
   345                                 httpEquivIndex = Integer.MAX_VALUE;
   346                                 contentTypeIndex = Integer.MAX_VALUE;
   347                                 state = MetaScanner.ATTRIBUTE_NAME;
   348                                 break beforeattributenameloop;                                
   349                             case 'h':
   350                             case 'H':
   351                                 contentIndex = Integer.MAX_VALUE;
   352                                 charsetIndex = Integer.MAX_VALUE;
   353                                 httpEquivIndex = 0;
   354                                 contentTypeIndex = Integer.MAX_VALUE;
   355                                 state = MetaScanner.ATTRIBUTE_NAME;
   356                                 break beforeattributenameloop;                                
   357                             default:
   358                                 contentIndex = Integer.MAX_VALUE;
   359                                 charsetIndex = Integer.MAX_VALUE;
   360                                 httpEquivIndex = Integer.MAX_VALUE;
   361                                 contentTypeIndex = Integer.MAX_VALUE;
   362                                 state = MetaScanner.ATTRIBUTE_NAME;
   363                                 break beforeattributenameloop;
   364                             // continue stateloop;
   365                         }
   366                     }
   367                     // FALLTHRU DON'T REORDER
   368                 case ATTRIBUTE_NAME:
   369                     attributenameloop: for (;;) {
   370                         c = read();
   371                         switch (c) {
   372                             case -1:
   373                                 break stateloop;
   374                             case ' ':
   375                             case '\t':
   376                             case '\n':
   377                             case '\u000C':
   378                                 state = MetaScanner.AFTER_ATTRIBUTE_NAME;
   379                                 continue stateloop;
   380                             case '/':
   381                                 state = MetaScanner.SELF_CLOSING_START_TAG;
   382                                 continue stateloop;
   383                             case '=':
   384                                 strBufLen = 0;
   385                                 contentTypeIndex = 0;
   386                                 state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
   387                                 break attributenameloop;
   388                             // continue stateloop;
   389                             case '>':
   390                                 if (handleTag()) {
   391                                     break stateloop;
   392                                 }
   393                                 state = MetaScanner.DATA;
   394                                 continue stateloop;
   395                             default:
   396                                 if (metaState == A) {
   397                                     if (c >= 'A' && c <= 'Z') {
   398                                         c += 0x20;
   399                                     }
   400                                     if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) {
   401                                         ++contentIndex;
   402                                     } else {
   403                                         contentIndex = Integer.MAX_VALUE;
   404                                     }
   405                                     if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) {
   406                                         ++charsetIndex;
   407                                     } else {
   408                                         charsetIndex = Integer.MAX_VALUE;
   409                                     }
   410                                     if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) {
   411                                         ++httpEquivIndex;
   412                                     } else {
   413                                         httpEquivIndex = Integer.MAX_VALUE;
   414                                     }                                    
   415                                 }
   416                                 continue;
   417                         }
   418                     }
   419                     // FALLTHRU DON'T REORDER
   420                 case BEFORE_ATTRIBUTE_VALUE:
   421                     beforeattributevalueloop: for (;;) {
   422                         c = read();
   423                         switch (c) {
   424                             case -1:
   425                                 break stateloop;
   426                             case ' ':
   427                             case '\t':
   428                             case '\n':
   429                             case '\u000C':
   430                                 continue;
   431                             case '"':
   432                                 state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
   433                                 break beforeattributevalueloop;
   434                             // continue stateloop;
   435                             case '\'':
   436                                 state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED;
   437                                 continue stateloop;
   438                             case '>':
   439                                 if (handleTag()) {
   440                                     break stateloop;
   441                                 }
   442                                 state = MetaScanner.DATA;
   443                                 continue stateloop;
   444                             default:
   445                                 handleCharInAttributeValue(c);
   446                                 state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED;
   447                                 continue stateloop;
   448                         }
   449                     }
   450                     // FALLTHRU DON'T REORDER
   451                 case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
   452                     attributevaluedoublequotedloop: for (;;) {
   453                         if (reconsume) {
   454                             reconsume = false;
   455                         } else {
   456                             c = read();
   457                         }
   458                         switch (c) {
   459                             case -1:
   460                                 break stateloop;
   461                             case '"':
   462                                 handleAttributeValue();
   463                                 state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
   464                                 break attributevaluedoublequotedloop;
   465                             // continue stateloop;
   466                             default:
   467                                 handleCharInAttributeValue(c);
   468                                 continue;
   469                         }
   470                     }
   471                     // FALLTHRU DON'T REORDER
   472                 case AFTER_ATTRIBUTE_VALUE_QUOTED:
   473                     afterattributevaluequotedloop: for (;;) {
   474                         c = read();
   475                         switch (c) {
   476                             case -1:
   477                                 break stateloop;
   478                             case ' ':
   479                             case '\t':
   480                             case '\n':
   481                             case '\u000C':
   482                                 state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
   483                                 continue stateloop;
   484                             case '/':
   485                                 state = MetaScanner.SELF_CLOSING_START_TAG;
   486                                 break afterattributevaluequotedloop;
   487                             // continue stateloop;
   488                             case '>':
   489                                 if (handleTag()) {
   490                                     break stateloop;
   491                                 }
   492                                 state = MetaScanner.DATA;
   493                                 continue stateloop;
   494                             default:
   495                                 state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
   496                                 reconsume = true;
   497                                 continue stateloop;
   498                         }
   499                     }
   500                     // FALLTHRU DON'T REORDER
   501                 case SELF_CLOSING_START_TAG:
   502                     c = read();
   503                     switch (c) {
   504                         case -1:
   505                             break stateloop;
   506                         case '>':
   507                             if (handleTag()) {
   508                                 break stateloop;
   509                             }
   510                             state = MetaScanner.DATA;
   511                             continue stateloop;
   512                         default:
   513                             state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
   514                             reconsume = true;
   515                             continue stateloop;
   516                     }
   517                     // XXX reorder point
   518                 case ATTRIBUTE_VALUE_UNQUOTED:
   519                     for (;;) {
   520                         if (reconsume) {
   521                             reconsume = false;
   522                         } else {
   523                             c = read();
   524                         }
   525                         switch (c) {
   526                             case -1:
   527                                 break stateloop;
   528                             case ' ':
   529                             case '\t':
   530                             case '\n':
   532                             case '\u000C':
   533                                 handleAttributeValue();
   534                                 state = MetaScanner.BEFORE_ATTRIBUTE_NAME;
   535                                 continue stateloop;
   536                             case '>':
   537                                 handleAttributeValue();
   538                                 if (handleTag()) {
   539                                     break stateloop;
   540                                 }
   541                                 state = MetaScanner.DATA;
   542                                 continue stateloop;
   543                             default:
   544                                 handleCharInAttributeValue(c);
   545                                 continue;
   546                         }
   547                     }
   548                     // XXX reorder point
   549                 case AFTER_ATTRIBUTE_NAME:
   550                     for (;;) {
   551                         c = read();
   552                         switch (c) {
   553                             case -1:
   554                                 break stateloop;
   555                             case ' ':
   556                             case '\t':
   557                             case '\n':
   558                             case '\u000C':
   559                                 continue;
   560                             case '/':
   561                                 handleAttributeValue();
   562                                 state = MetaScanner.SELF_CLOSING_START_TAG;
   563                                 continue stateloop;
   564                             case '=':
   565                                 strBufLen = 0;
   566                                 contentTypeIndex = 0;
   567                                 state = MetaScanner.BEFORE_ATTRIBUTE_VALUE;
   568                                 continue stateloop;
   569                             case '>':
   570                                 handleAttributeValue();
   571                                 if (handleTag()) {
   572                                     break stateloop;
   573                                 }
   574                                 state = MetaScanner.DATA;
   575                                 continue stateloop;
   576                             case 'c':
   577                             case 'C':
   578                                 contentIndex = 0;
   579                                 charsetIndex = 0;
   580                                 state = MetaScanner.ATTRIBUTE_NAME;
   581                                 continue stateloop;
   582                             default:
   583                                 contentIndex = Integer.MAX_VALUE;
   584                                 charsetIndex = Integer.MAX_VALUE;
   585                                 state = MetaScanner.ATTRIBUTE_NAME;
   586                                 continue stateloop;
   587                         }
   588                     }
   589                     // XXX reorder point
   590                 case MARKUP_DECLARATION_OPEN:
   591                     markupdeclarationopenloop: for (;;) {
   592                         c = read();
   593                         switch (c) {
   594                             case -1:
   595                                 break stateloop;
   596                             case '-':
   597                                 state = MetaScanner.MARKUP_DECLARATION_HYPHEN;
   598                                 break markupdeclarationopenloop;
   599                             // continue stateloop;
   600                             default:
   601                                 state = MetaScanner.SCAN_UNTIL_GT;
   602                                 reconsume = true;
   603                                 continue stateloop;
   604                         }
   605                     }
   606                     // FALLTHRU DON'T REORDER
   607                 case MARKUP_DECLARATION_HYPHEN:
   608                     markupdeclarationhyphenloop: for (;;) {
   609                         c = read();
   610                         switch (c) {
   611                             case -1:
   612                                 break stateloop;
   613                             case '-':
   614                                 state = MetaScanner.COMMENT_START;
   615                                 break markupdeclarationhyphenloop;
   616                             // continue stateloop;
   617                             default:
   618                                 state = MetaScanner.SCAN_UNTIL_GT;
   619                                 reconsume = true;
   620                                 continue stateloop;
   621                         }
   622                     }
   623                     // FALLTHRU DON'T REORDER
   624                 case COMMENT_START:
   625                     commentstartloop: for (;;) {
   626                         c = read();
   627                         switch (c) {
   628                             case -1:
   629                                 break stateloop;
   630                             case '-':
   631                                 state = MetaScanner.COMMENT_START_DASH;
   632                                 continue stateloop;
   633                             case '>':
   634                                 state = MetaScanner.DATA;
   635                                 continue stateloop;
   636                             default:
   637                                 state = MetaScanner.COMMENT;
   638                                 break commentstartloop;
   639                             // continue stateloop;
   640                         }
   641                     }
   642                     // FALLTHRU DON'T REORDER
   643                 case COMMENT:
   644                     commentloop: for (;;) {
   645                         c = read();
   646                         switch (c) {
   647                             case -1:
   648                                 break stateloop;
   649                             case '-':
   650                                 state = MetaScanner.COMMENT_END_DASH;
   651                                 break commentloop;
   652                             // continue stateloop;
   653                             default:
   654                                 continue;
   655                         }
   656                     }
   657                     // FALLTHRU DON'T REORDER
   658                 case COMMENT_END_DASH:
   659                     commentenddashloop: for (;;) {
   660                         c = read();
   661                         switch (c) {
   662                             case -1:
   663                                 break stateloop;
   664                             case '-':
   665                                 state = MetaScanner.COMMENT_END;
   666                                 break commentenddashloop;
   667                             // continue stateloop;
   668                             default:
   669                                 state = MetaScanner.COMMENT;
   670                                 continue stateloop;
   671                         }
   672                     }
   673                     // FALLTHRU DON'T REORDER
   674                 case COMMENT_END:
   675                     for (;;) {
   676                         c = read();
   677                         switch (c) {
   678                             case -1:
   679                                 break stateloop;
   680                             case '>':
   681                                 state = MetaScanner.DATA;
   682                                 continue stateloop;
   683                             case '-':
   684                                 continue;
   685                             default:
   686                                 state = MetaScanner.COMMENT;
   687                                 continue stateloop;
   688                         }
   689                     }
   690                     // XXX reorder point
   691                 case COMMENT_START_DASH:
   692                     c = read();
   693                     switch (c) {
   694                         case -1:
   695                             break stateloop;
   696                         case '-':
   697                             state = MetaScanner.COMMENT_END;
   698                             continue stateloop;
   699                         case '>':
   700                             state = MetaScanner.DATA;
   701                             continue stateloop;
   702                         default:
   703                             state = MetaScanner.COMMENT;
   704                             continue stateloop;
   705                     }
   706                     // XXX reorder point
   707                 case ATTRIBUTE_VALUE_SINGLE_QUOTED:
   708                     for (;;) {
   709                         if (reconsume) {
   710                             reconsume = false;
   711                         } else {
   712                             c = read();
   713                         }
   714                         switch (c) {
   715                             case -1:
   716                                 break stateloop;
   717                             case '\'':
   718                                 handleAttributeValue();
   719                                 state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED;
   720                                 continue stateloop;
   721                             default:
   722                                 handleCharInAttributeValue(c);
   723                                 continue;
   724                         }
   725                     }
   726                     // XXX reorder point
   727                 case SCAN_UNTIL_GT:
   728                     for (;;) {
   729                         if (reconsume) {
   730                             reconsume = false;
   731                         } else {
   732                             c = read();
   733                         }
   734                         switch (c) {
   735                             case -1:
   736                                 break stateloop;
   737                             case '>':
   738                                 state = MetaScanner.DATA;
   739                                 continue stateloop;
   740                             default:
   741                                 continue;
   742                         }
   743                     }
   744             }
   745         }
   746         stateSave  = state;
   747     }
   749     private void handleCharInAttributeValue(int c) {
   750         if (metaState == A) {
   751             if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) {
   752                 addToBuffer(c);
   753             } else if (httpEquivIndex == HTTP_EQUIV.length) {
   754                 if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) {
   755                     ++contentTypeIndex;
   756                 } else {
   757                     contentTypeIndex = Integer.MAX_VALUE;
   758                 }
   759             }
   760         }
   761     }
   763     @Inline private int toAsciiLowerCase(int c) {
   764         if (c >= 'A' && c <= 'Z') {
   765             return c + 0x20;
   766         }
   767         return c;
   768     }
   770     /**
   771      * Adds a character to the accumulation buffer.
   772      * @param c the character to add
   773      */
   774     private void addToBuffer(int c) {
   775         if (strBufLen == strBuf.length) {
   776             char[] newBuf = new char[strBuf.length + (strBuf.length << 1)];
   777             System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
   778             strBuf = newBuf;
   779         }
   780         strBuf[strBufLen++] = (char)c;
   781     }
   783     /**
   784      * Attempts to extract a charset name from the accumulation buffer.
   785      * @return <code>true</code> if successful
   786      * @throws SAXException
   787      */
   788     private void handleAttributeValue() throws SAXException {
   789         if (metaState != A) {
   790             return;
   791         }
   792         if (contentIndex == CONTENT.length && content == null) {
   793             content = Portability.newStringFromBuffer(strBuf, 0, strBufLen);
   794             return;
   795         }
   796         if (charsetIndex == CHARSET.length && charset == null) {
   797             charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen);            
   798             return;
   799         }
   800         if (httpEquivIndex == HTTP_EQUIV.length
   801                 && httpEquivState == HTTP_EQUIV_NOT_SEEN) {
   802             httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE
   803                     : HTTP_EQUIV_OTHER;
   804             return;
   805         }
   806     }
   808     private boolean handleTag() throws SAXException {
   809         boolean stop = handleTagInner();
   810         Portability.releaseString(content);
   811         content = null;
   812         Portability.releaseString(charset);
   813         charset = null;
   814         httpEquivState = HTTP_EQUIV_NOT_SEEN;
   815         return stop;
   816     }
   818     private boolean handleTagInner() throws SAXException {
   819         if (charset != null && tryCharset(charset)) {
   820                 return true;
   821         }
   822         if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) {
   823             String extract = TreeBuilder.extractCharsetFromContent(content);
   824             if (extract == null) {
   825                 return false;
   826             }
   827             boolean success = tryCharset(extract);
   828             Portability.releaseString(extract);
   829             return success;
   830         }
   831         return false;
   832     }
   834     /**
   835      * Tries to switch to an encoding.
   836      * 
   837      * @param encoding
   838      * @return <code>true</code> if successful
   839      * @throws SAXException
   840      */
   841     protected abstract boolean tryCharset(String encoding) throws SAXException;
   843 }

mercurial