mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,425 @@
     1.4 +/*
     1.5 + * ====================================================================
     1.6 + * Licensed to the Apache Software Foundation (ASF) under one
     1.7 + * or more contributor license agreements.  See the NOTICE file
     1.8 + * distributed with this work for additional information
     1.9 + * regarding copyright ownership.  The ASF licenses this file
    1.10 + * to you under the Apache License, Version 2.0 (the
    1.11 + * "License"); you may not use this file except in compliance
    1.12 + * with the License.  You may obtain a copy of the License at
    1.13 + *
    1.14 + *   http://www.apache.org/licenses/LICENSE-2.0
    1.15 + *
    1.16 + * Unless required by applicable law or agreed to in writing,
    1.17 + * software distributed under the License is distributed on an
    1.18 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    1.19 + * KIND, either express or implied.  See the License for the
    1.20 + * specific language governing permissions and limitations
    1.21 + * under the License.
    1.22 + * ====================================================================
    1.23 + *
    1.24 + * This software consists of voluntary contributions made by many
    1.25 + * individuals on behalf of the Apache Software Foundation.  For more
    1.26 + * information on the Apache Software Foundation, please see
    1.27 + * <http://www.apache.org/>.
    1.28 + *
    1.29 + */
    1.30 +
    1.31 +package ch.boye.httpclientandroidlib.message;
    1.32 +
    1.33 +import java.util.NoSuchElementException;
    1.34 +
    1.35 +import ch.boye.httpclientandroidlib.HeaderIterator;
    1.36 +import ch.boye.httpclientandroidlib.ParseException;
    1.37 +import ch.boye.httpclientandroidlib.TokenIterator;
    1.38 +
    1.39 +/**
    1.40 + * Basic implementation of a {@link TokenIterator}.
    1.41 + * This implementation parses <tt>#token<tt> sequences as
    1.42 + * defined by RFC 2616, section 2.
    1.43 + * It extends that definition somewhat beyond US-ASCII.
    1.44 + *
    1.45 + * @since 4.0
    1.46 + */
    1.47 +public class BasicTokenIterator implements TokenIterator {
    1.48 +
    1.49 +    /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
    1.50 +    // the order of the characters here is adjusted to put the
    1.51 +    // most likely candidates at the beginning of the collection
    1.52 +    public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
    1.53 +
    1.54 +
    1.55 +    /** The iterator from which to obtain the next header. */
    1.56 +    protected final HeaderIterator headerIt;
    1.57 +
    1.58 +    /**
    1.59 +     * The value of the current header.
    1.60 +     * This is the header value that includes {@link #currentToken}.
    1.61 +     * Undefined if the iteration is over.
    1.62 +     */
    1.63 +    protected String currentHeader;
    1.64 +
    1.65 +    /**
    1.66 +     * The token to be returned by the next call to {@link #currentToken}.
    1.67 +     * <code>null</code> if the iteration is over.
    1.68 +     */
    1.69 +    protected String currentToken;
    1.70 +
    1.71 +    /**
    1.72 +     * The position after {@link #currentToken} in {@link #currentHeader}.
    1.73 +     * Undefined if the iteration is over.
    1.74 +     */
    1.75 +    protected int searchPos;
    1.76 +
    1.77 +
    1.78 +    /**
    1.79 +     * Creates a new instance of {@link BasicTokenIterator}.
    1.80 +     *
    1.81 +     * @param headerIterator    the iterator for the headers to tokenize
    1.82 +     */
    1.83 +    public BasicTokenIterator(final HeaderIterator headerIterator) {
    1.84 +        if (headerIterator == null) {
    1.85 +            throw new IllegalArgumentException
    1.86 +                ("Header iterator must not be null.");
    1.87 +        }
    1.88 +
    1.89 +        this.headerIt = headerIterator;
    1.90 +        this.searchPos = findNext(-1);
    1.91 +    }
    1.92 +
    1.93 +
    1.94 +    // non-javadoc, see interface TokenIterator
    1.95 +    public boolean hasNext() {
    1.96 +        return (this.currentToken != null);
    1.97 +    }
    1.98 +
    1.99 +
   1.100 +    /**
   1.101 +     * Obtains the next token from this iteration.
   1.102 +     *
   1.103 +     * @return  the next token in this iteration
   1.104 +     *
   1.105 +     * @throws NoSuchElementException   if the iteration is already over
   1.106 +     * @throws ParseException   if an invalid header value is encountered
   1.107 +     */
   1.108 +    public String nextToken()
   1.109 +        throws NoSuchElementException, ParseException {
   1.110 +
   1.111 +        if (this.currentToken == null) {
   1.112 +            throw new NoSuchElementException("Iteration already finished.");
   1.113 +        }
   1.114 +
   1.115 +        final String result = this.currentToken;
   1.116 +        // updates currentToken, may trigger ParseException:
   1.117 +        this.searchPos = findNext(this.searchPos);
   1.118 +
   1.119 +        return result;
   1.120 +    }
   1.121 +
   1.122 +
   1.123 +    /**
   1.124 +     * Returns the next token.
   1.125 +     * Same as {@link #nextToken}, but with generic return type.
   1.126 +     *
   1.127 +     * @return  the next token in this iteration
   1.128 +     *
   1.129 +     * @throws NoSuchElementException   if there are no more tokens
   1.130 +     * @throws ParseException   if an invalid header value is encountered
   1.131 +     */
   1.132 +    public final Object next()
   1.133 +        throws NoSuchElementException, ParseException {
   1.134 +        return nextToken();
   1.135 +    }
   1.136 +
   1.137 +
   1.138 +    /**
   1.139 +     * Removing tokens is not supported.
   1.140 +     *
   1.141 +     * @throws UnsupportedOperationException    always
   1.142 +     */
   1.143 +    public final void remove()
   1.144 +        throws UnsupportedOperationException {
   1.145 +
   1.146 +        throw new UnsupportedOperationException
   1.147 +            ("Removing tokens is not supported.");
   1.148 +    }
   1.149 +
   1.150 +
   1.151 +    /**
   1.152 +     * Determines the next token.
   1.153 +     * If found, the token is stored in {@link #currentToken}.
   1.154 +     * The return value indicates the position after the token
   1.155 +     * in {@link #currentHeader}. If necessary, the next header
   1.156 +     * will be obtained from {@link #headerIt}.
   1.157 +     * If not found, {@link #currentToken} is set to <code>null</code>.
   1.158 +     *
   1.159 +     * @param from      the position in the current header at which to
   1.160 +     *                  start the search, -1 to search in the first header
   1.161 +     *
   1.162 +     * @return  the position after the found token in the current header, or
   1.163 +     *          negative if there was no next token
   1.164 +     *
   1.165 +     * @throws ParseException   if an invalid header value is encountered
   1.166 +     */
   1.167 +    protected int findNext(int from)
   1.168 +        throws ParseException {
   1.169 +
   1.170 +        if (from < 0) {
   1.171 +            // called from the constructor, initialize the first header
   1.172 +            if (!this.headerIt.hasNext()) {
   1.173 +                return -1;
   1.174 +            }
   1.175 +            this.currentHeader = this.headerIt.nextHeader().getValue();
   1.176 +            from = 0;
   1.177 +        } else {
   1.178 +            // called after a token, make sure there is a separator
   1.179 +            from = findTokenSeparator(from);
   1.180 +        }
   1.181 +
   1.182 +        int start = findTokenStart(from);
   1.183 +        if (start < 0) {
   1.184 +            this.currentToken = null;
   1.185 +            return -1; // nothing found
   1.186 +        }
   1.187 +
   1.188 +        int end = findTokenEnd(start);
   1.189 +        this.currentToken = createToken(this.currentHeader, start, end);
   1.190 +        return end;
   1.191 +    }
   1.192 +
   1.193 +
   1.194 +    /**
   1.195 +     * Creates a new token to be returned.
   1.196 +     * Called from {@link #findNext findNext} after the token is identified.
   1.197 +     * The default implementation simply calls
   1.198 +     * {@link java.lang.String#substring String.substring}.
   1.199 +     * <br/>
   1.200 +     * If header values are significantly longer than tokens, and some
   1.201 +     * tokens are permanently referenced by the application, there can
   1.202 +     * be problems with garbage collection. A substring will hold a
   1.203 +     * reference to the full characters of the original string and
   1.204 +     * therefore occupies more memory than might be expected.
   1.205 +     * To avoid this, override this method and create a new string
   1.206 +     * instead of a substring.
   1.207 +     *
   1.208 +     * @param value     the full header value from which to create a token
   1.209 +     * @param start     the index of the first token character
   1.210 +     * @param end       the index after the last token character
   1.211 +     *
   1.212 +     * @return  a string representing the token identified by the arguments
   1.213 +     */
   1.214 +    protected String createToken(String value, int start, int end) {
   1.215 +        return value.substring(start, end);
   1.216 +    }
   1.217 +
   1.218 +
   1.219 +    /**
   1.220 +     * Determines the starting position of the next token.
   1.221 +     * This method will iterate over headers if necessary.
   1.222 +     *
   1.223 +     * @param from      the position in the current header at which to
   1.224 +     *                  start the search
   1.225 +     *
   1.226 +     * @return  the position of the token start in the current header,
   1.227 +     *          negative if no token start could be found
   1.228 +     */
   1.229 +    protected int findTokenStart(int from) {
   1.230 +        if (from < 0) {
   1.231 +            throw new IllegalArgumentException
   1.232 +                ("Search position must not be negative: " + from);
   1.233 +        }
   1.234 +
   1.235 +        boolean found = false;
   1.236 +        while (!found && (this.currentHeader != null)) {
   1.237 +
   1.238 +            final int to = this.currentHeader.length();
   1.239 +            while (!found && (from < to)) {
   1.240 +
   1.241 +                final char ch = this.currentHeader.charAt(from);
   1.242 +                if (isTokenSeparator(ch) || isWhitespace(ch)) {
   1.243 +                    // whitspace and token separators are skipped
   1.244 +                    from++;
   1.245 +                } else if (isTokenChar(this.currentHeader.charAt(from))) {
   1.246 +                    // found the start of a token
   1.247 +                    found = true;
   1.248 +                } else {
   1.249 +                    throw new ParseException
   1.250 +                        ("Invalid character before token (pos " + from +
   1.251 +                         "): " + this.currentHeader);
   1.252 +                }
   1.253 +            }
   1.254 +            if (!found) {
   1.255 +                if (this.headerIt.hasNext()) {
   1.256 +                    this.currentHeader = this.headerIt.nextHeader().getValue();
   1.257 +                    from = 0;
   1.258 +                } else {
   1.259 +                    this.currentHeader = null;
   1.260 +                }
   1.261 +            }
   1.262 +        } // while headers
   1.263 +
   1.264 +        return found ? from : -1;
   1.265 +    }
   1.266 +
   1.267 +
   1.268 +    /**
   1.269 +     * Determines the position of the next token separator.
   1.270 +     * Because of multi-header joining rules, the end of a
   1.271 +     * header value is a token separator. This method does
   1.272 +     * therefore not need to iterate over headers.
   1.273 +     *
   1.274 +     * @param from      the position in the current header at which to
   1.275 +     *                  start the search
   1.276 +     *
   1.277 +     * @return  the position of a token separator in the current header,
   1.278 +     *          or at the end
   1.279 +     *
   1.280 +     * @throws ParseException
   1.281 +     *         if a new token is found before a token separator.
   1.282 +     *         RFC 2616, section 2.1 explicitly requires a comma between
   1.283 +     *         tokens for <tt>#</tt>.
   1.284 +     */
   1.285 +    protected int findTokenSeparator(int from) {
   1.286 +        if (from < 0) {
   1.287 +            throw new IllegalArgumentException
   1.288 +                ("Search position must not be negative: " + from);
   1.289 +        }
   1.290 +
   1.291 +        boolean found = false;
   1.292 +        final int to = this.currentHeader.length();
   1.293 +        while (!found && (from < to)) {
   1.294 +            final char ch = this.currentHeader.charAt(from);
   1.295 +            if (isTokenSeparator(ch)) {
   1.296 +                found = true;
   1.297 +            } else if (isWhitespace(ch)) {
   1.298 +                from++;
   1.299 +            } else if (isTokenChar(ch)) {
   1.300 +                throw new ParseException
   1.301 +                    ("Tokens without separator (pos " + from +
   1.302 +                     "): " + this.currentHeader);
   1.303 +            } else {
   1.304 +                throw new ParseException
   1.305 +                    ("Invalid character after token (pos " + from +
   1.306 +                     "): " + this.currentHeader);
   1.307 +            }
   1.308 +        }
   1.309 +
   1.310 +        return from;
   1.311 +    }
   1.312 +
   1.313 +
   1.314 +    /**
   1.315 +     * Determines the ending position of the current token.
   1.316 +     * This method will not leave the current header value,
   1.317 +     * since the end of the header value is a token boundary.
   1.318 +     *
   1.319 +     * @param from      the position of the first character of the token
   1.320 +     *
   1.321 +     * @return  the position after the last character of the token.
   1.322 +     *          The behavior is undefined if <code>from</code> does not
   1.323 +     *          point to a token character in the current header value.
   1.324 +     */
   1.325 +    protected int findTokenEnd(int from) {
   1.326 +        if (from < 0) {
   1.327 +            throw new IllegalArgumentException
   1.328 +                ("Token start position must not be negative: " + from);
   1.329 +        }
   1.330 +
   1.331 +        final int to = this.currentHeader.length();
   1.332 +        int end = from+1;
   1.333 +        while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
   1.334 +            end++;
   1.335 +        }
   1.336 +
   1.337 +        return end;
   1.338 +    }
   1.339 +
   1.340 +
   1.341 +    /**
   1.342 +     * Checks whether a character is a token separator.
   1.343 +     * RFC 2616, section 2.1 defines comma as the separator for
   1.344 +     * <tt>#token</tt> sequences. The end of a header value will
   1.345 +     * also separate tokens, but that is not a character check.
   1.346 +     *
   1.347 +     * @param ch        the character to check
   1.348 +     *
   1.349 +     * @return  <code>true</code> if the character is a token separator,
   1.350 +     *          <code>false</code> otherwise
   1.351 +     */
   1.352 +    protected boolean isTokenSeparator(char ch) {
   1.353 +        return (ch == ',');
   1.354 +    }
   1.355 +
   1.356 +
   1.357 +    /**
   1.358 +     * Checks whether a character is a whitespace character.
   1.359 +     * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
   1.360 +     * The optional preceeding line break is irrelevant, since header
   1.361 +     * continuation is handled transparently when parsing messages.
   1.362 +     *
   1.363 +     * @param ch        the character to check
   1.364 +     *
   1.365 +     * @return  <code>true</code> if the character is whitespace,
   1.366 +     *          <code>false</code> otherwise
   1.367 +     */
   1.368 +    protected boolean isWhitespace(char ch) {
   1.369 +
   1.370 +        // we do not use Character.isWhitspace(ch) here, since that allows
   1.371 +        // many control characters which are not whitespace as per RFC 2616
   1.372 +        return ((ch == '\t') || Character.isSpaceChar(ch));
   1.373 +    }
   1.374 +
   1.375 +
   1.376 +    /**
   1.377 +     * Checks whether a character is a valid token character.
   1.378 +     * Whitespace, control characters, and HTTP separators are not
   1.379 +     * valid token characters. The HTTP specification (RFC 2616, section 2.2)
   1.380 +     * defines tokens only for the US-ASCII character set, this
   1.381 +     * method extends the definition to other character sets.
   1.382 +     *
   1.383 +     * @param ch        the character to check
   1.384 +     *
   1.385 +     * @return  <code>true</code> if the character is a valid token start,
   1.386 +     *          <code>false</code> otherwise
   1.387 +     */
   1.388 +    protected boolean isTokenChar(char ch) {
   1.389 +
   1.390 +        // common sense extension of ALPHA + DIGIT
   1.391 +        if (Character.isLetterOrDigit(ch))
   1.392 +            return true;
   1.393 +
   1.394 +        // common sense extension of CTL
   1.395 +        if (Character.isISOControl(ch))
   1.396 +            return false;
   1.397 +
   1.398 +        // no common sense extension for this
   1.399 +        if (isHttpSeparator(ch))
   1.400 +            return false;
   1.401 +
   1.402 +        // RFC 2616, section 2.2 defines a token character as
   1.403 +        // "any CHAR except CTLs or separators". The controls
   1.404 +        // and separators are included in the checks above.
   1.405 +        // This will yield unexpected results for Unicode format characters.
   1.406 +        // If that is a problem, overwrite isHttpSeparator(char) to filter
   1.407 +        // out the false positives.
   1.408 +        return true;
   1.409 +    }
   1.410 +
   1.411 +
   1.412 +    /**
   1.413 +     * Checks whether a character is an HTTP separator.
   1.414 +     * The implementation in this class checks only for the HTTP separators
   1.415 +     * defined in RFC 2616, section 2.2. If you need to detect other
   1.416 +     * separators beyond the US-ASCII character set, override this method.
   1.417 +     *
   1.418 +     * @param ch        the character to check
   1.419 +     *
   1.420 +     * @return  <code>true</code> if the character is an HTTP separator
   1.421 +     */
   1.422 +    protected boolean isHttpSeparator(char ch) {
   1.423 +        return (HTTP_SEPARATORS.indexOf(ch) >= 0);
   1.424 +    }
   1.425 +
   1.426 +
   1.427 +} // class BasicTokenIterator
   1.428 +

mercurial