mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 * ====================================================================
michael@0 3 * Licensed to the Apache Software Foundation (ASF) under one
michael@0 4 * or more contributor license agreements. See the NOTICE file
michael@0 5 * distributed with this work for additional information
michael@0 6 * regarding copyright ownership. The ASF licenses this file
michael@0 7 * to you under the Apache License, Version 2.0 (the
michael@0 8 * "License"); you may not use this file except in compliance
michael@0 9 * with the License. You may obtain a copy of the License at
michael@0 10 *
michael@0 11 * http://www.apache.org/licenses/LICENSE-2.0
michael@0 12 *
michael@0 13 * Unless required by applicable law or agreed to in writing,
michael@0 14 * software distributed under the License is distributed on an
michael@0 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
michael@0 16 * KIND, either express or implied. See the License for the
michael@0 17 * specific language governing permissions and limitations
michael@0 18 * under the License.
michael@0 19 * ====================================================================
michael@0 20 *
michael@0 21 * This software consists of voluntary contributions made by many
michael@0 22 * individuals on behalf of the Apache Software Foundation. For more
michael@0 23 * information on the Apache Software Foundation, please see
michael@0 24 * <http://www.apache.org/>.
michael@0 25 *
michael@0 26 */
michael@0 27
michael@0 28 package ch.boye.httpclientandroidlib.message;
michael@0 29
michael@0 30 import java.util.NoSuchElementException;
michael@0 31
michael@0 32 import ch.boye.httpclientandroidlib.HeaderIterator;
michael@0 33 import ch.boye.httpclientandroidlib.ParseException;
michael@0 34 import ch.boye.httpclientandroidlib.TokenIterator;
michael@0 35
michael@0 36 /**
michael@0 37 * Basic implementation of a {@link TokenIterator}.
michael@0 38 * This implementation parses <tt>#token<tt> sequences as
michael@0 39 * defined by RFC 2616, section 2.
michael@0 40 * It extends that definition somewhat beyond US-ASCII.
michael@0 41 *
michael@0 42 * @since 4.0
michael@0 43 */
michael@0 44 public class BasicTokenIterator implements TokenIterator {
michael@0 45
michael@0 46 /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
michael@0 47 // the order of the characters here is adjusted to put the
michael@0 48 // most likely candidates at the beginning of the collection
michael@0 49 public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
michael@0 50
michael@0 51
michael@0 52 /** The iterator from which to obtain the next header. */
michael@0 53 protected final HeaderIterator headerIt;
michael@0 54
michael@0 55 /**
michael@0 56 * The value of the current header.
michael@0 57 * This is the header value that includes {@link #currentToken}.
michael@0 58 * Undefined if the iteration is over.
michael@0 59 */
michael@0 60 protected String currentHeader;
michael@0 61
michael@0 62 /**
michael@0 63 * The token to be returned by the next call to {@link #currentToken}.
michael@0 64 * <code>null</code> if the iteration is over.
michael@0 65 */
michael@0 66 protected String currentToken;
michael@0 67
michael@0 68 /**
michael@0 69 * The position after {@link #currentToken} in {@link #currentHeader}.
michael@0 70 * Undefined if the iteration is over.
michael@0 71 */
michael@0 72 protected int searchPos;
michael@0 73
michael@0 74
michael@0 75 /**
michael@0 76 * Creates a new instance of {@link BasicTokenIterator}.
michael@0 77 *
michael@0 78 * @param headerIterator the iterator for the headers to tokenize
michael@0 79 */
michael@0 80 public BasicTokenIterator(final HeaderIterator headerIterator) {
michael@0 81 if (headerIterator == null) {
michael@0 82 throw new IllegalArgumentException
michael@0 83 ("Header iterator must not be null.");
michael@0 84 }
michael@0 85
michael@0 86 this.headerIt = headerIterator;
michael@0 87 this.searchPos = findNext(-1);
michael@0 88 }
michael@0 89
michael@0 90
michael@0 91 // non-javadoc, see interface TokenIterator
michael@0 92 public boolean hasNext() {
michael@0 93 return (this.currentToken != null);
michael@0 94 }
michael@0 95
michael@0 96
michael@0 97 /**
michael@0 98 * Obtains the next token from this iteration.
michael@0 99 *
michael@0 100 * @return the next token in this iteration
michael@0 101 *
michael@0 102 * @throws NoSuchElementException if the iteration is already over
michael@0 103 * @throws ParseException if an invalid header value is encountered
michael@0 104 */
michael@0 105 public String nextToken()
michael@0 106 throws NoSuchElementException, ParseException {
michael@0 107
michael@0 108 if (this.currentToken == null) {
michael@0 109 throw new NoSuchElementException("Iteration already finished.");
michael@0 110 }
michael@0 111
michael@0 112 final String result = this.currentToken;
michael@0 113 // updates currentToken, may trigger ParseException:
michael@0 114 this.searchPos = findNext(this.searchPos);
michael@0 115
michael@0 116 return result;
michael@0 117 }
michael@0 118
michael@0 119
michael@0 120 /**
michael@0 121 * Returns the next token.
michael@0 122 * Same as {@link #nextToken}, but with generic return type.
michael@0 123 *
michael@0 124 * @return the next token in this iteration
michael@0 125 *
michael@0 126 * @throws NoSuchElementException if there are no more tokens
michael@0 127 * @throws ParseException if an invalid header value is encountered
michael@0 128 */
michael@0 129 public final Object next()
michael@0 130 throws NoSuchElementException, ParseException {
michael@0 131 return nextToken();
michael@0 132 }
michael@0 133
michael@0 134
michael@0 135 /**
michael@0 136 * Removing tokens is not supported.
michael@0 137 *
michael@0 138 * @throws UnsupportedOperationException always
michael@0 139 */
michael@0 140 public final void remove()
michael@0 141 throws UnsupportedOperationException {
michael@0 142
michael@0 143 throw new UnsupportedOperationException
michael@0 144 ("Removing tokens is not supported.");
michael@0 145 }
michael@0 146
michael@0 147
michael@0 148 /**
michael@0 149 * Determines the next token.
michael@0 150 * If found, the token is stored in {@link #currentToken}.
michael@0 151 * The return value indicates the position after the token
michael@0 152 * in {@link #currentHeader}. If necessary, the next header
michael@0 153 * will be obtained from {@link #headerIt}.
michael@0 154 * If not found, {@link #currentToken} is set to <code>null</code>.
michael@0 155 *
michael@0 156 * @param from the position in the current header at which to
michael@0 157 * start the search, -1 to search in the first header
michael@0 158 *
michael@0 159 * @return the position after the found token in the current header, or
michael@0 160 * negative if there was no next token
michael@0 161 *
michael@0 162 * @throws ParseException if an invalid header value is encountered
michael@0 163 */
michael@0 164 protected int findNext(int from)
michael@0 165 throws ParseException {
michael@0 166
michael@0 167 if (from < 0) {
michael@0 168 // called from the constructor, initialize the first header
michael@0 169 if (!this.headerIt.hasNext()) {
michael@0 170 return -1;
michael@0 171 }
michael@0 172 this.currentHeader = this.headerIt.nextHeader().getValue();
michael@0 173 from = 0;
michael@0 174 } else {
michael@0 175 // called after a token, make sure there is a separator
michael@0 176 from = findTokenSeparator(from);
michael@0 177 }
michael@0 178
michael@0 179 int start = findTokenStart(from);
michael@0 180 if (start < 0) {
michael@0 181 this.currentToken = null;
michael@0 182 return -1; // nothing found
michael@0 183 }
michael@0 184
michael@0 185 int end = findTokenEnd(start);
michael@0 186 this.currentToken = createToken(this.currentHeader, start, end);
michael@0 187 return end;
michael@0 188 }
michael@0 189
michael@0 190
michael@0 191 /**
michael@0 192 * Creates a new token to be returned.
michael@0 193 * Called from {@link #findNext findNext} after the token is identified.
michael@0 194 * The default implementation simply calls
michael@0 195 * {@link java.lang.String#substring String.substring}.
michael@0 196 * <br/>
michael@0 197 * If header values are significantly longer than tokens, and some
michael@0 198 * tokens are permanently referenced by the application, there can
michael@0 199 * be problems with garbage collection. A substring will hold a
michael@0 200 * reference to the full characters of the original string and
michael@0 201 * therefore occupies more memory than might be expected.
michael@0 202 * To avoid this, override this method and create a new string
michael@0 203 * instead of a substring.
michael@0 204 *
michael@0 205 * @param value the full header value from which to create a token
michael@0 206 * @param start the index of the first token character
michael@0 207 * @param end the index after the last token character
michael@0 208 *
michael@0 209 * @return a string representing the token identified by the arguments
michael@0 210 */
michael@0 211 protected String createToken(String value, int start, int end) {
michael@0 212 return value.substring(start, end);
michael@0 213 }
michael@0 214
michael@0 215
michael@0 216 /**
michael@0 217 * Determines the starting position of the next token.
michael@0 218 * This method will iterate over headers if necessary.
michael@0 219 *
michael@0 220 * @param from the position in the current header at which to
michael@0 221 * start the search
michael@0 222 *
michael@0 223 * @return the position of the token start in the current header,
michael@0 224 * negative if no token start could be found
michael@0 225 */
michael@0 226 protected int findTokenStart(int from) {
michael@0 227 if (from < 0) {
michael@0 228 throw new IllegalArgumentException
michael@0 229 ("Search position must not be negative: " + from);
michael@0 230 }
michael@0 231
michael@0 232 boolean found = false;
michael@0 233 while (!found && (this.currentHeader != null)) {
michael@0 234
michael@0 235 final int to = this.currentHeader.length();
michael@0 236 while (!found && (from < to)) {
michael@0 237
michael@0 238 final char ch = this.currentHeader.charAt(from);
michael@0 239 if (isTokenSeparator(ch) || isWhitespace(ch)) {
michael@0 240 // whitspace and token separators are skipped
michael@0 241 from++;
michael@0 242 } else if (isTokenChar(this.currentHeader.charAt(from))) {
michael@0 243 // found the start of a token
michael@0 244 found = true;
michael@0 245 } else {
michael@0 246 throw new ParseException
michael@0 247 ("Invalid character before token (pos " + from +
michael@0 248 "): " + this.currentHeader);
michael@0 249 }
michael@0 250 }
michael@0 251 if (!found) {
michael@0 252 if (this.headerIt.hasNext()) {
michael@0 253 this.currentHeader = this.headerIt.nextHeader().getValue();
michael@0 254 from = 0;
michael@0 255 } else {
michael@0 256 this.currentHeader = null;
michael@0 257 }
michael@0 258 }
michael@0 259 } // while headers
michael@0 260
michael@0 261 return found ? from : -1;
michael@0 262 }
michael@0 263
michael@0 264
michael@0 265 /**
michael@0 266 * Determines the position of the next token separator.
michael@0 267 * Because of multi-header joining rules, the end of a
michael@0 268 * header value is a token separator. This method does
michael@0 269 * therefore not need to iterate over headers.
michael@0 270 *
michael@0 271 * @param from the position in the current header at which to
michael@0 272 * start the search
michael@0 273 *
michael@0 274 * @return the position of a token separator in the current header,
michael@0 275 * or at the end
michael@0 276 *
michael@0 277 * @throws ParseException
michael@0 278 * if a new token is found before a token separator.
michael@0 279 * RFC 2616, section 2.1 explicitly requires a comma between
michael@0 280 * tokens for <tt>#</tt>.
michael@0 281 */
michael@0 282 protected int findTokenSeparator(int from) {
michael@0 283 if (from < 0) {
michael@0 284 throw new IllegalArgumentException
michael@0 285 ("Search position must not be negative: " + from);
michael@0 286 }
michael@0 287
michael@0 288 boolean found = false;
michael@0 289 final int to = this.currentHeader.length();
michael@0 290 while (!found && (from < to)) {
michael@0 291 final char ch = this.currentHeader.charAt(from);
michael@0 292 if (isTokenSeparator(ch)) {
michael@0 293 found = true;
michael@0 294 } else if (isWhitespace(ch)) {
michael@0 295 from++;
michael@0 296 } else if (isTokenChar(ch)) {
michael@0 297 throw new ParseException
michael@0 298 ("Tokens without separator (pos " + from +
michael@0 299 "): " + this.currentHeader);
michael@0 300 } else {
michael@0 301 throw new ParseException
michael@0 302 ("Invalid character after token (pos " + from +
michael@0 303 "): " + this.currentHeader);
michael@0 304 }
michael@0 305 }
michael@0 306
michael@0 307 return from;
michael@0 308 }
michael@0 309
michael@0 310
michael@0 311 /**
michael@0 312 * Determines the ending position of the current token.
michael@0 313 * This method will not leave the current header value,
michael@0 314 * since the end of the header value is a token boundary.
michael@0 315 *
michael@0 316 * @param from the position of the first character of the token
michael@0 317 *
michael@0 318 * @return the position after the last character of the token.
michael@0 319 * The behavior is undefined if <code>from</code> does not
michael@0 320 * point to a token character in the current header value.
michael@0 321 */
michael@0 322 protected int findTokenEnd(int from) {
michael@0 323 if (from < 0) {
michael@0 324 throw new IllegalArgumentException
michael@0 325 ("Token start position must not be negative: " + from);
michael@0 326 }
michael@0 327
michael@0 328 final int to = this.currentHeader.length();
michael@0 329 int end = from+1;
michael@0 330 while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
michael@0 331 end++;
michael@0 332 }
michael@0 333
michael@0 334 return end;
michael@0 335 }
michael@0 336
michael@0 337
michael@0 338 /**
michael@0 339 * Checks whether a character is a token separator.
michael@0 340 * RFC 2616, section 2.1 defines comma as the separator for
michael@0 341 * <tt>#token</tt> sequences. The end of a header value will
michael@0 342 * also separate tokens, but that is not a character check.
michael@0 343 *
michael@0 344 * @param ch the character to check
michael@0 345 *
michael@0 346 * @return <code>true</code> if the character is a token separator,
michael@0 347 * <code>false</code> otherwise
michael@0 348 */
michael@0 349 protected boolean isTokenSeparator(char ch) {
michael@0 350 return (ch == ',');
michael@0 351 }
michael@0 352
michael@0 353
michael@0 354 /**
michael@0 355 * Checks whether a character is a whitespace character.
michael@0 356 * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
michael@0 357 * The optional preceeding line break is irrelevant, since header
michael@0 358 * continuation is handled transparently when parsing messages.
michael@0 359 *
michael@0 360 * @param ch the character to check
michael@0 361 *
michael@0 362 * @return <code>true</code> if the character is whitespace,
michael@0 363 * <code>false</code> otherwise
michael@0 364 */
michael@0 365 protected boolean isWhitespace(char ch) {
michael@0 366
michael@0 367 // we do not use Character.isWhitspace(ch) here, since that allows
michael@0 368 // many control characters which are not whitespace as per RFC 2616
michael@0 369 return ((ch == '\t') || Character.isSpaceChar(ch));
michael@0 370 }
michael@0 371
michael@0 372
michael@0 373 /**
michael@0 374 * Checks whether a character is a valid token character.
michael@0 375 * Whitespace, control characters, and HTTP separators are not
michael@0 376 * valid token characters. The HTTP specification (RFC 2616, section 2.2)
michael@0 377 * defines tokens only for the US-ASCII character set, this
michael@0 378 * method extends the definition to other character sets.
michael@0 379 *
michael@0 380 * @param ch the character to check
michael@0 381 *
michael@0 382 * @return <code>true</code> if the character is a valid token start,
michael@0 383 * <code>false</code> otherwise
michael@0 384 */
michael@0 385 protected boolean isTokenChar(char ch) {
michael@0 386
michael@0 387 // common sense extension of ALPHA + DIGIT
michael@0 388 if (Character.isLetterOrDigit(ch))
michael@0 389 return true;
michael@0 390
michael@0 391 // common sense extension of CTL
michael@0 392 if (Character.isISOControl(ch))
michael@0 393 return false;
michael@0 394
michael@0 395 // no common sense extension for this
michael@0 396 if (isHttpSeparator(ch))
michael@0 397 return false;
michael@0 398
michael@0 399 // RFC 2616, section 2.2 defines a token character as
michael@0 400 // "any CHAR except CTLs or separators". The controls
michael@0 401 // and separators are included in the checks above.
michael@0 402 // This will yield unexpected results for Unicode format characters.
michael@0 403 // If that is a problem, overwrite isHttpSeparator(char) to filter
michael@0 404 // out the false positives.
michael@0 405 return true;
michael@0 406 }
michael@0 407
michael@0 408
michael@0 409 /**
michael@0 410 * Checks whether a character is an HTTP separator.
michael@0 411 * The implementation in this class checks only for the HTTP separators
michael@0 412 * defined in RFC 2616, section 2.2. If you need to detect other
michael@0 413 * separators beyond the US-ASCII character set, override this method.
michael@0 414 *
michael@0 415 * @param ch the character to check
michael@0 416 *
michael@0 417 * @return <code>true</code> if the character is an HTTP separator
michael@0 418 */
michael@0 419 protected boolean isHttpSeparator(char ch) {
michael@0 420 return (HTTP_SEPARATORS.indexOf(ch) >= 0);
michael@0 421 }
michael@0 422
michael@0 423
michael@0 424 } // class BasicTokenIterator
michael@0 425

mercurial