The Tor Browser: comparison mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java

--1:000000000000
+:c26845e1cf9e
+/*
+* ====================================================================
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+* ====================================================================
+*
+* This software consists of voluntary contributions made by many
+* individuals on behalf of the Apache Software Foundation.  For more
+* information on the Apache Software Foundation, please see
+* <http://www.apache.org/>.
+*
+*/
+package ch.boye.httpclientandroidlib.message;
+import java.util.NoSuchElementException;
+import ch.boye.httpclientandroidlib.HeaderIterator;
+import ch.boye.httpclientandroidlib.ParseException;
+import ch.boye.httpclientandroidlib.TokenIterator;
+/**
+* Basic implementation of a {@link TokenIterator}.
+* This implementation parses <tt>#token<tt> sequences as
+* defined by RFC 2616, section 2.
+* It extends that definition somewhat beyond US-ASCII.
+*
+* @since 4.0
+*/
+public class BasicTokenIterator implements TokenIterator {
+/** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
+// the order of the characters here is adjusted to put the
+// most likely candidates at the beginning of the collection
+public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
+/** The iterator from which to obtain the next header. */
+protected final HeaderIterator headerIt;
+/**
+* The value of the current header.
+* This is the header value that includes {@link #currentToken}.
+* Undefined if the iteration is over.
+*/
+protected String currentHeader;
+/**
+* The token to be returned by the next call to {@link #currentToken}.
+* <code>null</code> if the iteration is over.
+*/
+protected String currentToken;
+/**
+* The position after {@link #currentToken} in {@link #currentHeader}.
+* Undefined if the iteration is over.
+*/
+protected int searchPos;
+/**
+* Creates a new instance of {@link BasicTokenIterator}.
+*
+* @param headerIterator    the iterator for the headers to tokenize
+*/
+public BasicTokenIterator(final HeaderIterator headerIterator) {
+if (headerIterator == null) {
+throw new IllegalArgumentException
+("Header iterator must not be null.");
+}
+this.headerIt = headerIterator;
+this.searchPos = findNext(-1);
+}
+// non-javadoc, see interface TokenIterator
+public boolean hasNext() {
+return (this.currentToken != null);
+}
+/**
+* Obtains the next token from this iteration.
+*
+* @return  the next token in this iteration
+*
+* @throws NoSuchElementException   if the iteration is already over
+* @throws ParseException   if an invalid header value is encountered
+*/
+public String nextToken()
+throws NoSuchElementException, ParseException {
+if (this.currentToken == null) {
+throw new NoSuchElementException("Iteration already finished.");
+}
+final String result = this.currentToken;
+// updates currentToken, may trigger ParseException:
+this.searchPos = findNext(this.searchPos);
+return result;
+}
+/**
+* Returns the next token.
+* Same as {@link #nextToken}, but with generic return type.
+*
+* @return  the next token in this iteration
+*
+* @throws NoSuchElementException   if there are no more tokens
+* @throws ParseException   if an invalid header value is encountered
+*/
+public final Object next()
+throws NoSuchElementException, ParseException {
+return nextToken();
+}
+/**
+* Removing tokens is not supported.
+*
+* @throws UnsupportedOperationException    always
+*/
+public final void remove()
+throws UnsupportedOperationException {
+throw new UnsupportedOperationException
+("Removing tokens is not supported.");
+}
+/**
+* Determines the next token.
+* If found, the token is stored in {@link #currentToken}.
+* The return value indicates the position after the token
+* in {@link #currentHeader}. If necessary, the next header
+* will be obtained from {@link #headerIt}.
+* If not found, {@link #currentToken} is set to <code>null</code>.
+*
+* @param from      the position in the current header at which to
+*                  start the search, -1 to search in the first header
+*
+* @return  the position after the found token in the current header, or
+*          negative if there was no next token
+*
+* @throws ParseException   if an invalid header value is encountered
+*/
+protected int findNext(int from)
+throws ParseException {
+if (from < 0) {
+// called from the constructor, initialize the first header
+if (!this.headerIt.hasNext()) {
+return -1;
+}
+this.currentHeader = this.headerIt.nextHeader().getValue();
+from = 0;
+} else {
+// called after a token, make sure there is a separator
+from = findTokenSeparator(from);
+}
+int start = findTokenStart(from);
+if (start < 0) {
+this.currentToken = null;
+return -1; // nothing found
+}
+int end = findTokenEnd(start);
+this.currentToken = createToken(this.currentHeader, start, end);
+return end;
+}
+/**
+* Creates a new token to be returned.
+* Called from {@link #findNext findNext} after the token is identified.
+* The default implementation simply calls
+* {@link java.lang.String#substring String.substring}.
+* <br/>
+* If header values are significantly longer than tokens, and some
+* tokens are permanently referenced by the application, there can
+* be problems with garbage collection. A substring will hold a
+* reference to the full characters of the original string and
+* therefore occupies more memory than might be expected.
+* To avoid this, override this method and create a new string
+* instead of a substring.
+*
+* @param value     the full header value from which to create a token
+* @param start     the index of the first token character
+* @param end       the index after the last token character
+*
+* @return  a string representing the token identified by the arguments
+*/
+protected String createToken(String value, int start, int end) {
+return value.substring(start, end);
+}
+/**
+* Determines the starting position of the next token.
+* This method will iterate over headers if necessary.
+*
+* @param from      the position in the current header at which to
+*                  start the search
+*
+* @return  the position of the token start in the current header,
+*          negative if no token start could be found
+*/
+protected int findTokenStart(int from) {
+if (from < 0) {
+throw new IllegalArgumentException
+("Search position must not be negative: " + from);
+}
+boolean found = false;
+while (!found && (this.currentHeader != null)) {
+final int to = this.currentHeader.length();
+while (!found && (from < to)) {
+final char ch = this.currentHeader.charAt(from);
+if (isTokenSeparator(ch) || isWhitespace(ch)) {
+// whitspace and token separators are skipped
+from++;
+} else if (isTokenChar(this.currentHeader.charAt(from))) {
+// found the start of a token
+found = true;
+} else {
+throw new ParseException
+("Invalid character before token (pos " + from +
+"): " + this.currentHeader);
+}
+}
+if (!found) {
+if (this.headerIt.hasNext()) {
+this.currentHeader = this.headerIt.nextHeader().getValue();
+from = 0;
+} else {
+this.currentHeader = null;
+}
+}
+} // while headers
+return found ? from : -1;
+}
+/**
+* Determines the position of the next token separator.
+* Because of multi-header joining rules, the end of a
+* header value is a token separator. This method does
+* therefore not need to iterate over headers.
+*
+* @param from      the position in the current header at which to
+*                  start the search
+*
+* @return  the position of a token separator in the current header,
+*          or at the end
+*
+* @throws ParseException
+*         if a new token is found before a token separator.
+*         RFC 2616, section 2.1 explicitly requires a comma between
+*         tokens for <tt>#</tt>.
+*/
+protected int findTokenSeparator(int from) {
+if (from < 0) {
+throw new IllegalArgumentException
+("Search position must not be negative: " + from);
+}
+boolean found = false;
+final int to = this.currentHeader.length();
+while (!found && (from < to)) {
+final char ch = this.currentHeader.charAt(from);
+if (isTokenSeparator(ch)) {
+found = true;
+} else if (isWhitespace(ch)) {
+from++;
+} else if (isTokenChar(ch)) {
+throw new ParseException
+("Tokens without separator (pos " + from +
+"): " + this.currentHeader);
+} else {
+throw new ParseException
+("Invalid character after token (pos " + from +
+"): " + this.currentHeader);
+}
+}
+return from;
+}
+/**
+* Determines the ending position of the current token.
+* This method will not leave the current header value,
+* since the end of the header value is a token boundary.
+*
+* @param from      the position of the first character of the token
+*
+* @return  the position after the last character of the token.
+*          The behavior is undefined if <code>from</code> does not
+*          point to a token character in the current header value.
+*/
+protected int findTokenEnd(int from) {
+if (from < 0) {
+throw new IllegalArgumentException
+("Token start position must not be negative: " + from);
+}
+final int to = this.currentHeader.length();
+int end = from+1;
+while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
+end++;
+}
+return end;
+}
+/**
+* Checks whether a character is a token separator.
+* RFC 2616, section 2.1 defines comma as the separator for
+* <tt>#token</tt> sequences. The end of a header value will
+* also separate tokens, but that is not a character check.
+*
+* @param ch        the character to check
+*
+* @return  <code>true</code> if the character is a token separator,
+*          <code>false</code> otherwise
+*/
+protected boolean isTokenSeparator(char ch) {
+return (ch == ',');
+}
+/**
+* Checks whether a character is a whitespace character.
+* RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
+* The optional preceeding line break is irrelevant, since header
+* continuation is handled transparently when parsing messages.
+*
+* @param ch        the character to check
+*
+* @return  <code>true</code> if the character is whitespace,
+*          <code>false</code> otherwise
+*/
+protected boolean isWhitespace(char ch) {
+// we do not use Character.isWhitspace(ch) here, since that allows
+// many control characters which are not whitespace as per RFC 2616
+return ((ch == '\t') || Character.isSpaceChar(ch));
+}
+/**
+* Checks whether a character is a valid token character.
+* Whitespace, control characters, and HTTP separators are not
+* valid token characters. The HTTP specification (RFC 2616, section 2.2)
+* defines tokens only for the US-ASCII character set, this
+* method extends the definition to other character sets.
+*
+* @param ch        the character to check
+*
+* @return  <code>true</code> if the character is a valid token start,
+*          <code>false</code> otherwise
+*/
+protected boolean isTokenChar(char ch) {
+// common sense extension of ALPHA + DIGIT
+if (Character.isLetterOrDigit(ch))
+return true;
+// common sense extension of CTL
+if (Character.isISOControl(ch))
+return false;
+// no common sense extension for this
+if (isHttpSeparator(ch))
+return false;
+// RFC 2616, section 2.2 defines a token character as
+// "any CHAR except CTLs or separators". The controls
+// and separators are included in the checks above.
+// This will yield unexpected results for Unicode format characters.
+// If that is a problem, overwrite isHttpSeparator(char) to filter
+// out the false positives.
+return true;
+}
+/**
+* Checks whether a character is an HTTP separator.
+* The implementation in this class checks only for the HTTP separators
+* defined in RFC 2616, section 2.2. If you need to detect other
+* separators beyond the US-ASCII character set, override this method.
+*
+* @param ch        the character to check
+*
+* @return  <code>true</code> if the character is an HTTP separator
+*/
+protected boolean isHttpSeparator(char ch) {
+return (HTTP_SEPARATORS.indexOf(ch) >= 0);
+}
+} // class BasicTokenIterator

The Tor Browser / file comparison

comparison: mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java

mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java