The Tor Browser: mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java@fc2d59ddac77

     1 /*

     2  * ====================================================================

     3  * Licensed to the Apache Software Foundation (ASF) under one

     4  * or more contributor license agreements.  See the NOTICE file

     5  * distributed with this work for additional information

     6  * regarding copyright ownership.  The ASF licenses this file

     7  * to you under the Apache License, Version 2.0 (the

     8  * "License"); you may not use this file except in compliance

     9  * with the License.  You may obtain a copy of the License at

    10  *

    11  *   http://www.apache.org/licenses/LICENSE-2.0

    12  *

    13  * Unless required by applicable law or agreed to in writing,

    14  * software distributed under the License is distributed on an

    15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

    16  * KIND, either express or implied.  See the License for the

    17  * specific language governing permissions and limitations

    18  * under the License.

    19  * ====================================================================

    20  *

    21  * This software consists of voluntary contributions made by many

    22  * individuals on behalf of the Apache Software Foundation.  For more

    23  * information on the Apache Software Foundation, please see

    24  * <http://www.apache.org/>.

    25  *

    26  */

    28 package ch.boye.httpclientandroidlib.message;

    30 import java.util.NoSuchElementException;

    32 import ch.boye.httpclientandroidlib.HeaderIterator;

    33 import ch.boye.httpclientandroidlib.ParseException;

    34 import ch.boye.httpclientandroidlib.TokenIterator;

    36 /**

    37  * Basic implementation of a {@link TokenIterator}.

    38  * This implementation parses <tt>#token<tt> sequences as

    39  * defined by RFC 2616, section 2.

    40  * It extends that definition somewhat beyond US-ASCII.

    41  *

    42  * @since 4.0

    43  */

    44 public class BasicTokenIterator implements TokenIterator {

    46     /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */

    47     // the order of the characters here is adjusted to put the

    48     // most likely candidates at the beginning of the collection

    49     public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";

    52     /** The iterator from which to obtain the next header. */

    53     protected final HeaderIterator headerIt;

    55     /**

    56      * The value of the current header.

    57      * This is the header value that includes {@link #currentToken}.

    58      * Undefined if the iteration is over.

    59      */

    60     protected String currentHeader;

    62     /**

    63      * The token to be returned by the next call to {@link #currentToken}.

    64      * <code>null</code> if the iteration is over.

    65      */

    66     protected String currentToken;

    68     /**

    69      * The position after {@link #currentToken} in {@link #currentHeader}.

    70      * Undefined if the iteration is over.

    71      */

    72     protected int searchPos;

    75     /**

    76      * Creates a new instance of {@link BasicTokenIterator}.

    77      *

    78      * @param headerIterator    the iterator for the headers to tokenize

    79      */

    80     public BasicTokenIterator(final HeaderIterator headerIterator) {

    81         if (headerIterator == null) {

    82             throw new IllegalArgumentException

    83                 ("Header iterator must not be null.");

    84         }

    86         this.headerIt = headerIterator;

    87         this.searchPos = findNext(-1);

    88     }

    91     // non-javadoc, see interface TokenIterator

    92     public boolean hasNext() {

    93         return (this.currentToken != null);

    94     }

    97     /**

    98      * Obtains the next token from this iteration.

    99      *

   100      * @return  the next token in this iteration

   101      *

   102      * @throws NoSuchElementException   if the iteration is already over

   103      * @throws ParseException   if an invalid header value is encountered

   104      */

   105     public String nextToken()

   106         throws NoSuchElementException, ParseException {

   108         if (this.currentToken == null) {

   109             throw new NoSuchElementException("Iteration already finished.");

   110         }

   112         final String result = this.currentToken;

   113         // updates currentToken, may trigger ParseException:

   114         this.searchPos = findNext(this.searchPos);

   116         return result;

   117     }

   120     /**

   121      * Returns the next token.

   122      * Same as {@link #nextToken}, but with generic return type.

   123      *

   124      * @return  the next token in this iteration

   125      *

   126      * @throws NoSuchElementException   if there are no more tokens

   127      * @throws ParseException   if an invalid header value is encountered

   128      */

   129     public final Object next()

   130         throws NoSuchElementException, ParseException {

   131         return nextToken();

   132     }

   135     /**

   136      * Removing tokens is not supported.

   137      *

   138      * @throws UnsupportedOperationException    always

   139      */

   140     public final void remove()

   141         throws UnsupportedOperationException {

   143         throw new UnsupportedOperationException

   144             ("Removing tokens is not supported.");

   145     }

   148     /**

   149      * Determines the next token.

   150      * If found, the token is stored in {@link #currentToken}.

   151      * The return value indicates the position after the token

   152      * in {@link #currentHeader}. If necessary, the next header

   153      * will be obtained from {@link #headerIt}.

   154      * If not found, {@link #currentToken} is set to <code>null</code>.

   155      *

   156      * @param from      the position in the current header at which to

   157      *                  start the search, -1 to search in the first header

   158      *

   159      * @return  the position after the found token in the current header, or

   160      *          negative if there was no next token

   161      *

   162      * @throws ParseException   if an invalid header value is encountered

   163      */

   164     protected int findNext(int from)

   165         throws ParseException {

   167         if (from < 0) {

   168             // called from the constructor, initialize the first header

   169             if (!this.headerIt.hasNext()) {

   170                 return -1;

   171             }

   172             this.currentHeader = this.headerIt.nextHeader().getValue();

   173             from = 0;

   174         } else {

   175             // called after a token, make sure there is a separator

   176             from = findTokenSeparator(from);

   177         }

   179         int start = findTokenStart(from);

   180         if (start < 0) {

   181             this.currentToken = null;

   182             return -1; // nothing found

   183         }

   185         int end = findTokenEnd(start);

   186         this.currentToken = createToken(this.currentHeader, start, end);

   187         return end;

   188     }

   191     /**

   192      * Creates a new token to be returned.

   193      * Called from {@link #findNext findNext} after the token is identified.

   194      * The default implementation simply calls

   195      * {@link java.lang.String#substring String.substring}.

   196      * <br/>

   197      * If header values are significantly longer than tokens, and some

   198      * tokens are permanently referenced by the application, there can

   199      * be problems with garbage collection. A substring will hold a

   200      * reference to the full characters of the original string and

   201      * therefore occupies more memory than might be expected.

   202      * To avoid this, override this method and create a new string

   203      * instead of a substring.

   204      *

   205      * @param value     the full header value from which to create a token

   206      * @param start     the index of the first token character

   207      * @param end       the index after the last token character

   208      *

   209      * @return  a string representing the token identified by the arguments

   210      */

   211     protected String createToken(String value, int start, int end) {

   212         return value.substring(start, end);

   213     }

   216     /**

   217      * Determines the starting position of the next token.

   218      * This method will iterate over headers if necessary.

   219      *

   220      * @param from      the position in the current header at which to

   221      *                  start the search

   222      *

   223      * @return  the position of the token start in the current header,

   224      *          negative if no token start could be found

   225      */

   226     protected int findTokenStart(int from) {

   227         if (from < 0) {

   228             throw new IllegalArgumentException

   229                 ("Search position must not be negative: " + from);

   230         }

   232         boolean found = false;

   233         while (!found && (this.currentHeader != null)) {

   235             final int to = this.currentHeader.length();

   236             while (!found && (from < to)) {

   238                 final char ch = this.currentHeader.charAt(from);

   239                 if (isTokenSeparator(ch) || isWhitespace(ch)) {

   240                     // whitspace and token separators are skipped

   241                     from++;

   242                 } else if (isTokenChar(this.currentHeader.charAt(from))) {

   243                     // found the start of a token

   244                     found = true;

   245                 } else {

   246                     throw new ParseException

   247                         ("Invalid character before token (pos " + from +

   248                          "): " + this.currentHeader);

   249                 }

   250             }

   251             if (!found) {

   252                 if (this.headerIt.hasNext()) {

   253                     this.currentHeader = this.headerIt.nextHeader().getValue();

   254                     from = 0;

   255                 } else {

   256                     this.currentHeader = null;

   257                 }

   258             }

   259         } // while headers

   261         return found ? from : -1;

   262     }

   265     /**

   266      * Determines the position of the next token separator.

   267      * Because of multi-header joining rules, the end of a

   268      * header value is a token separator. This method does

   269      * therefore not need to iterate over headers.

   270      *

   271      * @param from      the position in the current header at which to

   272      *                  start the search

   273      *

   274      * @return  the position of a token separator in the current header,

   275      *          or at the end

   276      *

   277      * @throws ParseException

   278      *         if a new token is found before a token separator.

   279      *         RFC 2616, section 2.1 explicitly requires a comma between

   280      *         tokens for <tt>#</tt>.

   281      */

   282     protected int findTokenSeparator(int from) {

   283         if (from < 0) {

   284             throw new IllegalArgumentException

   285                 ("Search position must not be negative: " + from);

   286         }

   288         boolean found = false;

   289         final int to = this.currentHeader.length();

   290         while (!found && (from < to)) {

   291             final char ch = this.currentHeader.charAt(from);

   292             if (isTokenSeparator(ch)) {

   293                 found = true;

   294             } else if (isWhitespace(ch)) {

   295                 from++;

   296             } else if (isTokenChar(ch)) {

   297                 throw new ParseException

   298                     ("Tokens without separator (pos " + from +

   299                      "): " + this.currentHeader);

   300             } else {

   301                 throw new ParseException

   302                     ("Invalid character after token (pos " + from +

   303                      "): " + this.currentHeader);

   304             }

   305         }

   307         return from;

   308     }

   311     /**

   312      * Determines the ending position of the current token.

   313      * This method will not leave the current header value,

   314      * since the end of the header value is a token boundary.

   315      *

   316      * @param from      the position of the first character of the token

   317      *

   318      * @return  the position after the last character of the token.

   319      *          The behavior is undefined if <code>from</code> does not

   320      *          point to a token character in the current header value.

   321      */

   322     protected int findTokenEnd(int from) {

   323         if (from < 0) {

   324             throw new IllegalArgumentException

   325                 ("Token start position must not be negative: " + from);

   326         }

   328         final int to = this.currentHeader.length();

   329         int end = from+1;

   330         while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {

   331             end++;

   332         }

   334         return end;

   335     }

   338     /**

   339      * Checks whether a character is a token separator.

   340      * RFC 2616, section 2.1 defines comma as the separator for

   341      * <tt>#token</tt> sequences. The end of a header value will

   342      * also separate tokens, but that is not a character check.

   343      *

   344      * @param ch        the character to check

   345      *

   346      * @return  <code>true</code> if the character is a token separator,

   347      *          <code>false</code> otherwise

   348      */

   349     protected boolean isTokenSeparator(char ch) {

   350         return (ch == ',');

   351     }

   354     /**

   355      * Checks whether a character is a whitespace character.

   356      * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.

   357      * The optional preceeding line break is irrelevant, since header

   358      * continuation is handled transparently when parsing messages.

   359      *

   360      * @param ch        the character to check

   361      *

   362      * @return  <code>true</code> if the character is whitespace,

   363      *          <code>false</code> otherwise

   364      */

   365     protected boolean isWhitespace(char ch) {

   367         // we do not use Character.isWhitspace(ch) here, since that allows

   368         // many control characters which are not whitespace as per RFC 2616

   369         return ((ch == '\t') || Character.isSpaceChar(ch));

   370     }

   373     /**

   374      * Checks whether a character is a valid token character.

   375      * Whitespace, control characters, and HTTP separators are not

   376      * valid token characters. The HTTP specification (RFC 2616, section 2.2)

   377      * defines tokens only for the US-ASCII character set, this

   378      * method extends the definition to other character sets.

   379      *

   380      * @param ch        the character to check

   381      *

   382      * @return  <code>true</code> if the character is a valid token start,

   383      *          <code>false</code> otherwise

   384      */

   385     protected boolean isTokenChar(char ch) {

   387         // common sense extension of ALPHA + DIGIT

   388         if (Character.isLetterOrDigit(ch))

   389             return true;

   391         // common sense extension of CTL

   392         if (Character.isISOControl(ch))

   393             return false;

   395         // no common sense extension for this

   396         if (isHttpSeparator(ch))

   397             return false;

   399         // RFC 2616, section 2.2 defines a token character as

   400         // "any CHAR except CTLs or separators". The controls

   401         // and separators are included in the checks above.

   402         // This will yield unexpected results for Unicode format characters.

   403         // If that is a problem, overwrite isHttpSeparator(char) to filter

   404         // out the false positives.

   405         return true;

   406     }

   409     /**

   410      * Checks whether a character is an HTTP separator.

   411      * The implementation in this class checks only for the HTTP separators

   412      * defined in RFC 2616, section 2.2. If you need to detect other

   413      * separators beyond the US-ASCII character set, override this method.

   414      *

   415      * @param ch        the character to check

   416      *

   417      * @return  <code>true</code> if the character is an HTTP separator

   418      */

   419     protected boolean isHttpSeparator(char ch) {

   420         return (HTTP_SEPARATORS.indexOf(ch) >= 0);

   421     }

   424 } // class BasicTokenIterator

The Tor Browser / file revision

mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java@fc2d59ddac77

mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java