mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2  * ====================================================================
     3  * Licensed to the Apache Software Foundation (ASF) under one
     4  * or more contributor license agreements.  See the NOTICE file
     5  * distributed with this work for additional information
     6  * regarding copyright ownership.  The ASF licenses this file
     7  * to you under the Apache License, Version 2.0 (the
     8  * "License"); you may not use this file except in compliance
     9  * with the License.  You may obtain a copy of the License at
    10  *
    11  *   http://www.apache.org/licenses/LICENSE-2.0
    12  *
    13  * Unless required by applicable law or agreed to in writing,
    14  * software distributed under the License is distributed on an
    15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    16  * KIND, either express or implied.  See the License for the
    17  * specific language governing permissions and limitations
    18  * under the License.
    19  * ====================================================================
    20  *
    21  * This software consists of voluntary contributions made by many
    22  * individuals on behalf of the Apache Software Foundation.  For more
    23  * information on the Apache Software Foundation, please see
    24  * <http://www.apache.org/>.
    25  *
    26  */
    28 package ch.boye.httpclientandroidlib.message;
    30 import java.util.NoSuchElementException;
    32 import ch.boye.httpclientandroidlib.HeaderIterator;
    33 import ch.boye.httpclientandroidlib.ParseException;
    34 import ch.boye.httpclientandroidlib.TokenIterator;
    36 /**
    37  * Basic implementation of a {@link TokenIterator}.
    38  * This implementation parses <tt>#token<tt> sequences as
    39  * defined by RFC 2616, section 2.
    40  * It extends that definition somewhat beyond US-ASCII.
    41  *
    42  * @since 4.0
    43  */
    44 public class BasicTokenIterator implements TokenIterator {
    46     /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
    47     // the order of the characters here is adjusted to put the
    48     // most likely candidates at the beginning of the collection
    49     public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
    52     /** The iterator from which to obtain the next header. */
    53     protected final HeaderIterator headerIt;
    55     /**
    56      * The value of the current header.
    57      * This is the header value that includes {@link #currentToken}.
    58      * Undefined if the iteration is over.
    59      */
    60     protected String currentHeader;
    62     /**
    63      * The token to be returned by the next call to {@link #currentToken}.
    64      * <code>null</code> if the iteration is over.
    65      */
    66     protected String currentToken;
    68     /**
    69      * The position after {@link #currentToken} in {@link #currentHeader}.
    70      * Undefined if the iteration is over.
    71      */
    72     protected int searchPos;
    75     /**
    76      * Creates a new instance of {@link BasicTokenIterator}.
    77      *
    78      * @param headerIterator    the iterator for the headers to tokenize
    79      */
    80     public BasicTokenIterator(final HeaderIterator headerIterator) {
    81         if (headerIterator == null) {
    82             throw new IllegalArgumentException
    83                 ("Header iterator must not be null.");
    84         }
    86         this.headerIt = headerIterator;
    87         this.searchPos = findNext(-1);
    88     }
    91     // non-javadoc, see interface TokenIterator
    92     public boolean hasNext() {
    93         return (this.currentToken != null);
    94     }
    97     /**
    98      * Obtains the next token from this iteration.
    99      *
   100      * @return  the next token in this iteration
   101      *
   102      * @throws NoSuchElementException   if the iteration is already over
   103      * @throws ParseException   if an invalid header value is encountered
   104      */
   105     public String nextToken()
   106         throws NoSuchElementException, ParseException {
   108         if (this.currentToken == null) {
   109             throw new NoSuchElementException("Iteration already finished.");
   110         }
   112         final String result = this.currentToken;
   113         // updates currentToken, may trigger ParseException:
   114         this.searchPos = findNext(this.searchPos);
   116         return result;
   117     }
   120     /**
   121      * Returns the next token.
   122      * Same as {@link #nextToken}, but with generic return type.
   123      *
   124      * @return  the next token in this iteration
   125      *
   126      * @throws NoSuchElementException   if there are no more tokens
   127      * @throws ParseException   if an invalid header value is encountered
   128      */
   129     public final Object next()
   130         throws NoSuchElementException, ParseException {
   131         return nextToken();
   132     }
   135     /**
   136      * Removing tokens is not supported.
   137      *
   138      * @throws UnsupportedOperationException    always
   139      */
   140     public final void remove()
   141         throws UnsupportedOperationException {
   143         throw new UnsupportedOperationException
   144             ("Removing tokens is not supported.");
   145     }
   148     /**
   149      * Determines the next token.
   150      * If found, the token is stored in {@link #currentToken}.
   151      * The return value indicates the position after the token
   152      * in {@link #currentHeader}. If necessary, the next header
   153      * will be obtained from {@link #headerIt}.
   154      * If not found, {@link #currentToken} is set to <code>null</code>.
   155      *
   156      * @param from      the position in the current header at which to
   157      *                  start the search, -1 to search in the first header
   158      *
   159      * @return  the position after the found token in the current header, or
   160      *          negative if there was no next token
   161      *
   162      * @throws ParseException   if an invalid header value is encountered
   163      */
   164     protected int findNext(int from)
   165         throws ParseException {
   167         if (from < 0) {
   168             // called from the constructor, initialize the first header
   169             if (!this.headerIt.hasNext()) {
   170                 return -1;
   171             }
   172             this.currentHeader = this.headerIt.nextHeader().getValue();
   173             from = 0;
   174         } else {
   175             // called after a token, make sure there is a separator
   176             from = findTokenSeparator(from);
   177         }
   179         int start = findTokenStart(from);
   180         if (start < 0) {
   181             this.currentToken = null;
   182             return -1; // nothing found
   183         }
   185         int end = findTokenEnd(start);
   186         this.currentToken = createToken(this.currentHeader, start, end);
   187         return end;
   188     }
   191     /**
   192      * Creates a new token to be returned.
   193      * Called from {@link #findNext findNext} after the token is identified.
   194      * The default implementation simply calls
   195      * {@link java.lang.String#substring String.substring}.
   196      * <br/>
   197      * If header values are significantly longer than tokens, and some
   198      * tokens are permanently referenced by the application, there can
   199      * be problems with garbage collection. A substring will hold a
   200      * reference to the full characters of the original string and
   201      * therefore occupies more memory than might be expected.
   202      * To avoid this, override this method and create a new string
   203      * instead of a substring.
   204      *
   205      * @param value     the full header value from which to create a token
   206      * @param start     the index of the first token character
   207      * @param end       the index after the last token character
   208      *
   209      * @return  a string representing the token identified by the arguments
   210      */
   211     protected String createToken(String value, int start, int end) {
   212         return value.substring(start, end);
   213     }
   216     /**
   217      * Determines the starting position of the next token.
   218      * This method will iterate over headers if necessary.
   219      *
   220      * @param from      the position in the current header at which to
   221      *                  start the search
   222      *
   223      * @return  the position of the token start in the current header,
   224      *          negative if no token start could be found
   225      */
   226     protected int findTokenStart(int from) {
   227         if (from < 0) {
   228             throw new IllegalArgumentException
   229                 ("Search position must not be negative: " + from);
   230         }
   232         boolean found = false;
   233         while (!found && (this.currentHeader != null)) {
   235             final int to = this.currentHeader.length();
   236             while (!found && (from < to)) {
   238                 final char ch = this.currentHeader.charAt(from);
   239                 if (isTokenSeparator(ch) || isWhitespace(ch)) {
   240                     // whitspace and token separators are skipped
   241                     from++;
   242                 } else if (isTokenChar(this.currentHeader.charAt(from))) {
   243                     // found the start of a token
   244                     found = true;
   245                 } else {
   246                     throw new ParseException
   247                         ("Invalid character before token (pos " + from +
   248                          "): " + this.currentHeader);
   249                 }
   250             }
   251             if (!found) {
   252                 if (this.headerIt.hasNext()) {
   253                     this.currentHeader = this.headerIt.nextHeader().getValue();
   254                     from = 0;
   255                 } else {
   256                     this.currentHeader = null;
   257                 }
   258             }
   259         } // while headers
   261         return found ? from : -1;
   262     }
   265     /**
   266      * Determines the position of the next token separator.
   267      * Because of multi-header joining rules, the end of a
   268      * header value is a token separator. This method does
   269      * therefore not need to iterate over headers.
   270      *
   271      * @param from      the position in the current header at which to
   272      *                  start the search
   273      *
   274      * @return  the position of a token separator in the current header,
   275      *          or at the end
   276      *
   277      * @throws ParseException
   278      *         if a new token is found before a token separator.
   279      *         RFC 2616, section 2.1 explicitly requires a comma between
   280      *         tokens for <tt>#</tt>.
   281      */
   282     protected int findTokenSeparator(int from) {
   283         if (from < 0) {
   284             throw new IllegalArgumentException
   285                 ("Search position must not be negative: " + from);
   286         }
   288         boolean found = false;
   289         final int to = this.currentHeader.length();
   290         while (!found && (from < to)) {
   291             final char ch = this.currentHeader.charAt(from);
   292             if (isTokenSeparator(ch)) {
   293                 found = true;
   294             } else if (isWhitespace(ch)) {
   295                 from++;
   296             } else if (isTokenChar(ch)) {
   297                 throw new ParseException
   298                     ("Tokens without separator (pos " + from +
   299                      "): " + this.currentHeader);
   300             } else {
   301                 throw new ParseException
   302                     ("Invalid character after token (pos " + from +
   303                      "): " + this.currentHeader);
   304             }
   305         }
   307         return from;
   308     }
   311     /**
   312      * Determines the ending position of the current token.
   313      * This method will not leave the current header value,
   314      * since the end of the header value is a token boundary.
   315      *
   316      * @param from      the position of the first character of the token
   317      *
   318      * @return  the position after the last character of the token.
   319      *          The behavior is undefined if <code>from</code> does not
   320      *          point to a token character in the current header value.
   321      */
   322     protected int findTokenEnd(int from) {
   323         if (from < 0) {
   324             throw new IllegalArgumentException
   325                 ("Token start position must not be negative: " + from);
   326         }
   328         final int to = this.currentHeader.length();
   329         int end = from+1;
   330         while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
   331             end++;
   332         }
   334         return end;
   335     }
   338     /**
   339      * Checks whether a character is a token separator.
   340      * RFC 2616, section 2.1 defines comma as the separator for
   341      * <tt>#token</tt> sequences. The end of a header value will
   342      * also separate tokens, but that is not a character check.
   343      *
   344      * @param ch        the character to check
   345      *
   346      * @return  <code>true</code> if the character is a token separator,
   347      *          <code>false</code> otherwise
   348      */
   349     protected boolean isTokenSeparator(char ch) {
   350         return (ch == ',');
   351     }
   354     /**
   355      * Checks whether a character is a whitespace character.
   356      * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
   357      * The optional preceeding line break is irrelevant, since header
   358      * continuation is handled transparently when parsing messages.
   359      *
   360      * @param ch        the character to check
   361      *
   362      * @return  <code>true</code> if the character is whitespace,
   363      *          <code>false</code> otherwise
   364      */
   365     protected boolean isWhitespace(char ch) {
   367         // we do not use Character.isWhitspace(ch) here, since that allows
   368         // many control characters which are not whitespace as per RFC 2616
   369         return ((ch == '\t') || Character.isSpaceChar(ch));
   370     }
   373     /**
   374      * Checks whether a character is a valid token character.
   375      * Whitespace, control characters, and HTTP separators are not
   376      * valid token characters. The HTTP specification (RFC 2616, section 2.2)
   377      * defines tokens only for the US-ASCII character set, this
   378      * method extends the definition to other character sets.
   379      *
   380      * @param ch        the character to check
   381      *
   382      * @return  <code>true</code> if the character is a valid token start,
   383      *          <code>false</code> otherwise
   384      */
   385     protected boolean isTokenChar(char ch) {
   387         // common sense extension of ALPHA + DIGIT
   388         if (Character.isLetterOrDigit(ch))
   389             return true;
   391         // common sense extension of CTL
   392         if (Character.isISOControl(ch))
   393             return false;
   395         // no common sense extension for this
   396         if (isHttpSeparator(ch))
   397             return false;
   399         // RFC 2616, section 2.2 defines a token character as
   400         // "any CHAR except CTLs or separators". The controls
   401         // and separators are included in the checks above.
   402         // This will yield unexpected results for Unicode format characters.
   403         // If that is a problem, overwrite isHttpSeparator(char) to filter
   404         // out the false positives.
   405         return true;
   406     }
   409     /**
   410      * Checks whether a character is an HTTP separator.
   411      * The implementation in this class checks only for the HTTP separators
   412      * defined in RFC 2616, section 2.2. If you need to detect other
   413      * separators beyond the US-ASCII character set, override this method.
   414      *
   415      * @param ch        the character to check
   416      *
   417      * @return  <code>true</code> if the character is an HTTP separator
   418      */
   419     protected boolean isHttpSeparator(char ch) {
   420         return (HTTP_SEPARATORS.indexOf(ch) >= 0);
   421     }
   424 } // class BasicTokenIterator

mercurial