Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | * ==================================================================== |
michael@0 | 3 | * Licensed to the Apache Software Foundation (ASF) under one |
michael@0 | 4 | * or more contributor license agreements. See the NOTICE file |
michael@0 | 5 | * distributed with this work for additional information |
michael@0 | 6 | * regarding copyright ownership. The ASF licenses this file |
michael@0 | 7 | * to you under the Apache License, Version 2.0 (the |
michael@0 | 8 | * "License"); you may not use this file except in compliance |
michael@0 | 9 | * with the License. You may obtain a copy of the License at |
michael@0 | 10 | * |
michael@0 | 11 | * http://www.apache.org/licenses/LICENSE-2.0 |
michael@0 | 12 | * |
michael@0 | 13 | * Unless required by applicable law or agreed to in writing, |
michael@0 | 14 | * software distributed under the License is distributed on an |
michael@0 | 15 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
michael@0 | 16 | * KIND, either express or implied. See the License for the |
michael@0 | 17 | * specific language governing permissions and limitations |
michael@0 | 18 | * under the License. |
michael@0 | 19 | * ==================================================================== |
michael@0 | 20 | * |
michael@0 | 21 | * This software consists of voluntary contributions made by many |
michael@0 | 22 | * individuals on behalf of the Apache Software Foundation. For more |
michael@0 | 23 | * information on the Apache Software Foundation, please see |
michael@0 | 24 | * <http://www.apache.org/>. |
michael@0 | 25 | * |
michael@0 | 26 | */ |
michael@0 | 27 | |
michael@0 | 28 | package ch.boye.httpclientandroidlib.message; |
michael@0 | 29 | |
michael@0 | 30 | import java.util.NoSuchElementException; |
michael@0 | 31 | |
michael@0 | 32 | import ch.boye.httpclientandroidlib.HeaderIterator; |
michael@0 | 33 | import ch.boye.httpclientandroidlib.ParseException; |
michael@0 | 34 | import ch.boye.httpclientandroidlib.TokenIterator; |
michael@0 | 35 | |
michael@0 | 36 | /** |
michael@0 | 37 | * Basic implementation of a {@link TokenIterator}. |
michael@0 | 38 | * This implementation parses <tt>#token<tt> sequences as |
michael@0 | 39 | * defined by RFC 2616, section 2. |
michael@0 | 40 | * It extends that definition somewhat beyond US-ASCII. |
michael@0 | 41 | * |
michael@0 | 42 | * @since 4.0 |
michael@0 | 43 | */ |
michael@0 | 44 | public class BasicTokenIterator implements TokenIterator { |
michael@0 | 45 | |
michael@0 | 46 | /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */ |
michael@0 | 47 | // the order of the characters here is adjusted to put the |
michael@0 | 48 | // most likely candidates at the beginning of the collection |
michael@0 | 49 | public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t"; |
michael@0 | 50 | |
michael@0 | 51 | |
michael@0 | 52 | /** The iterator from which to obtain the next header. */ |
michael@0 | 53 | protected final HeaderIterator headerIt; |
michael@0 | 54 | |
michael@0 | 55 | /** |
michael@0 | 56 | * The value of the current header. |
michael@0 | 57 | * This is the header value that includes {@link #currentToken}. |
michael@0 | 58 | * Undefined if the iteration is over. |
michael@0 | 59 | */ |
michael@0 | 60 | protected String currentHeader; |
michael@0 | 61 | |
michael@0 | 62 | /** |
michael@0 | 63 | * The token to be returned by the next call to {@link #currentToken}. |
michael@0 | 64 | * <code>null</code> if the iteration is over. |
michael@0 | 65 | */ |
michael@0 | 66 | protected String currentToken; |
michael@0 | 67 | |
michael@0 | 68 | /** |
michael@0 | 69 | * The position after {@link #currentToken} in {@link #currentHeader}. |
michael@0 | 70 | * Undefined if the iteration is over. |
michael@0 | 71 | */ |
michael@0 | 72 | protected int searchPos; |
michael@0 | 73 | |
michael@0 | 74 | |
michael@0 | 75 | /** |
michael@0 | 76 | * Creates a new instance of {@link BasicTokenIterator}. |
michael@0 | 77 | * |
michael@0 | 78 | * @param headerIterator the iterator for the headers to tokenize |
michael@0 | 79 | */ |
michael@0 | 80 | public BasicTokenIterator(final HeaderIterator headerIterator) { |
michael@0 | 81 | if (headerIterator == null) { |
michael@0 | 82 | throw new IllegalArgumentException |
michael@0 | 83 | ("Header iterator must not be null."); |
michael@0 | 84 | } |
michael@0 | 85 | |
michael@0 | 86 | this.headerIt = headerIterator; |
michael@0 | 87 | this.searchPos = findNext(-1); |
michael@0 | 88 | } |
michael@0 | 89 | |
michael@0 | 90 | |
michael@0 | 91 | // non-javadoc, see interface TokenIterator |
michael@0 | 92 | public boolean hasNext() { |
michael@0 | 93 | return (this.currentToken != null); |
michael@0 | 94 | } |
michael@0 | 95 | |
michael@0 | 96 | |
michael@0 | 97 | /** |
michael@0 | 98 | * Obtains the next token from this iteration. |
michael@0 | 99 | * |
michael@0 | 100 | * @return the next token in this iteration |
michael@0 | 101 | * |
michael@0 | 102 | * @throws NoSuchElementException if the iteration is already over |
michael@0 | 103 | * @throws ParseException if an invalid header value is encountered |
michael@0 | 104 | */ |
michael@0 | 105 | public String nextToken() |
michael@0 | 106 | throws NoSuchElementException, ParseException { |
michael@0 | 107 | |
michael@0 | 108 | if (this.currentToken == null) { |
michael@0 | 109 | throw new NoSuchElementException("Iteration already finished."); |
michael@0 | 110 | } |
michael@0 | 111 | |
michael@0 | 112 | final String result = this.currentToken; |
michael@0 | 113 | // updates currentToken, may trigger ParseException: |
michael@0 | 114 | this.searchPos = findNext(this.searchPos); |
michael@0 | 115 | |
michael@0 | 116 | return result; |
michael@0 | 117 | } |
michael@0 | 118 | |
michael@0 | 119 | |
michael@0 | 120 | /** |
michael@0 | 121 | * Returns the next token. |
michael@0 | 122 | * Same as {@link #nextToken}, but with generic return type. |
michael@0 | 123 | * |
michael@0 | 124 | * @return the next token in this iteration |
michael@0 | 125 | * |
michael@0 | 126 | * @throws NoSuchElementException if there are no more tokens |
michael@0 | 127 | * @throws ParseException if an invalid header value is encountered |
michael@0 | 128 | */ |
michael@0 | 129 | public final Object next() |
michael@0 | 130 | throws NoSuchElementException, ParseException { |
michael@0 | 131 | return nextToken(); |
michael@0 | 132 | } |
michael@0 | 133 | |
michael@0 | 134 | |
michael@0 | 135 | /** |
michael@0 | 136 | * Removing tokens is not supported. |
michael@0 | 137 | * |
michael@0 | 138 | * @throws UnsupportedOperationException always |
michael@0 | 139 | */ |
michael@0 | 140 | public final void remove() |
michael@0 | 141 | throws UnsupportedOperationException { |
michael@0 | 142 | |
michael@0 | 143 | throw new UnsupportedOperationException |
michael@0 | 144 | ("Removing tokens is not supported."); |
michael@0 | 145 | } |
michael@0 | 146 | |
michael@0 | 147 | |
michael@0 | 148 | /** |
michael@0 | 149 | * Determines the next token. |
michael@0 | 150 | * If found, the token is stored in {@link #currentToken}. |
michael@0 | 151 | * The return value indicates the position after the token |
michael@0 | 152 | * in {@link #currentHeader}. If necessary, the next header |
michael@0 | 153 | * will be obtained from {@link #headerIt}. |
michael@0 | 154 | * If not found, {@link #currentToken} is set to <code>null</code>. |
michael@0 | 155 | * |
michael@0 | 156 | * @param from the position in the current header at which to |
michael@0 | 157 | * start the search, -1 to search in the first header |
michael@0 | 158 | * |
michael@0 | 159 | * @return the position after the found token in the current header, or |
michael@0 | 160 | * negative if there was no next token |
michael@0 | 161 | * |
michael@0 | 162 | * @throws ParseException if an invalid header value is encountered |
michael@0 | 163 | */ |
michael@0 | 164 | protected int findNext(int from) |
michael@0 | 165 | throws ParseException { |
michael@0 | 166 | |
michael@0 | 167 | if (from < 0) { |
michael@0 | 168 | // called from the constructor, initialize the first header |
michael@0 | 169 | if (!this.headerIt.hasNext()) { |
michael@0 | 170 | return -1; |
michael@0 | 171 | } |
michael@0 | 172 | this.currentHeader = this.headerIt.nextHeader().getValue(); |
michael@0 | 173 | from = 0; |
michael@0 | 174 | } else { |
michael@0 | 175 | // called after a token, make sure there is a separator |
michael@0 | 176 | from = findTokenSeparator(from); |
michael@0 | 177 | } |
michael@0 | 178 | |
michael@0 | 179 | int start = findTokenStart(from); |
michael@0 | 180 | if (start < 0) { |
michael@0 | 181 | this.currentToken = null; |
michael@0 | 182 | return -1; // nothing found |
michael@0 | 183 | } |
michael@0 | 184 | |
michael@0 | 185 | int end = findTokenEnd(start); |
michael@0 | 186 | this.currentToken = createToken(this.currentHeader, start, end); |
michael@0 | 187 | return end; |
michael@0 | 188 | } |
michael@0 | 189 | |
michael@0 | 190 | |
michael@0 | 191 | /** |
michael@0 | 192 | * Creates a new token to be returned. |
michael@0 | 193 | * Called from {@link #findNext findNext} after the token is identified. |
michael@0 | 194 | * The default implementation simply calls |
michael@0 | 195 | * {@link java.lang.String#substring String.substring}. |
michael@0 | 196 | * <br/> |
michael@0 | 197 | * If header values are significantly longer than tokens, and some |
michael@0 | 198 | * tokens are permanently referenced by the application, there can |
michael@0 | 199 | * be problems with garbage collection. A substring will hold a |
michael@0 | 200 | * reference to the full characters of the original string and |
michael@0 | 201 | * therefore occupies more memory than might be expected. |
michael@0 | 202 | * To avoid this, override this method and create a new string |
michael@0 | 203 | * instead of a substring. |
michael@0 | 204 | * |
michael@0 | 205 | * @param value the full header value from which to create a token |
michael@0 | 206 | * @param start the index of the first token character |
michael@0 | 207 | * @param end the index after the last token character |
michael@0 | 208 | * |
michael@0 | 209 | * @return a string representing the token identified by the arguments |
michael@0 | 210 | */ |
michael@0 | 211 | protected String createToken(String value, int start, int end) { |
michael@0 | 212 | return value.substring(start, end); |
michael@0 | 213 | } |
michael@0 | 214 | |
michael@0 | 215 | |
michael@0 | 216 | /** |
michael@0 | 217 | * Determines the starting position of the next token. |
michael@0 | 218 | * This method will iterate over headers if necessary. |
michael@0 | 219 | * |
michael@0 | 220 | * @param from the position in the current header at which to |
michael@0 | 221 | * start the search |
michael@0 | 222 | * |
michael@0 | 223 | * @return the position of the token start in the current header, |
michael@0 | 224 | * negative if no token start could be found |
michael@0 | 225 | */ |
michael@0 | 226 | protected int findTokenStart(int from) { |
michael@0 | 227 | if (from < 0) { |
michael@0 | 228 | throw new IllegalArgumentException |
michael@0 | 229 | ("Search position must not be negative: " + from); |
michael@0 | 230 | } |
michael@0 | 231 | |
michael@0 | 232 | boolean found = false; |
michael@0 | 233 | while (!found && (this.currentHeader != null)) { |
michael@0 | 234 | |
michael@0 | 235 | final int to = this.currentHeader.length(); |
michael@0 | 236 | while (!found && (from < to)) { |
michael@0 | 237 | |
michael@0 | 238 | final char ch = this.currentHeader.charAt(from); |
michael@0 | 239 | if (isTokenSeparator(ch) || isWhitespace(ch)) { |
michael@0 | 240 | // whitspace and token separators are skipped |
michael@0 | 241 | from++; |
michael@0 | 242 | } else if (isTokenChar(this.currentHeader.charAt(from))) { |
michael@0 | 243 | // found the start of a token |
michael@0 | 244 | found = true; |
michael@0 | 245 | } else { |
michael@0 | 246 | throw new ParseException |
michael@0 | 247 | ("Invalid character before token (pos " + from + |
michael@0 | 248 | "): " + this.currentHeader); |
michael@0 | 249 | } |
michael@0 | 250 | } |
michael@0 | 251 | if (!found) { |
michael@0 | 252 | if (this.headerIt.hasNext()) { |
michael@0 | 253 | this.currentHeader = this.headerIt.nextHeader().getValue(); |
michael@0 | 254 | from = 0; |
michael@0 | 255 | } else { |
michael@0 | 256 | this.currentHeader = null; |
michael@0 | 257 | } |
michael@0 | 258 | } |
michael@0 | 259 | } // while headers |
michael@0 | 260 | |
michael@0 | 261 | return found ? from : -1; |
michael@0 | 262 | } |
michael@0 | 263 | |
michael@0 | 264 | |
michael@0 | 265 | /** |
michael@0 | 266 | * Determines the position of the next token separator. |
michael@0 | 267 | * Because of multi-header joining rules, the end of a |
michael@0 | 268 | * header value is a token separator. This method does |
michael@0 | 269 | * therefore not need to iterate over headers. |
michael@0 | 270 | * |
michael@0 | 271 | * @param from the position in the current header at which to |
michael@0 | 272 | * start the search |
michael@0 | 273 | * |
michael@0 | 274 | * @return the position of a token separator in the current header, |
michael@0 | 275 | * or at the end |
michael@0 | 276 | * |
michael@0 | 277 | * @throws ParseException |
michael@0 | 278 | * if a new token is found before a token separator. |
michael@0 | 279 | * RFC 2616, section 2.1 explicitly requires a comma between |
michael@0 | 280 | * tokens for <tt>#</tt>. |
michael@0 | 281 | */ |
michael@0 | 282 | protected int findTokenSeparator(int from) { |
michael@0 | 283 | if (from < 0) { |
michael@0 | 284 | throw new IllegalArgumentException |
michael@0 | 285 | ("Search position must not be negative: " + from); |
michael@0 | 286 | } |
michael@0 | 287 | |
michael@0 | 288 | boolean found = false; |
michael@0 | 289 | final int to = this.currentHeader.length(); |
michael@0 | 290 | while (!found && (from < to)) { |
michael@0 | 291 | final char ch = this.currentHeader.charAt(from); |
michael@0 | 292 | if (isTokenSeparator(ch)) { |
michael@0 | 293 | found = true; |
michael@0 | 294 | } else if (isWhitespace(ch)) { |
michael@0 | 295 | from++; |
michael@0 | 296 | } else if (isTokenChar(ch)) { |
michael@0 | 297 | throw new ParseException |
michael@0 | 298 | ("Tokens without separator (pos " + from + |
michael@0 | 299 | "): " + this.currentHeader); |
michael@0 | 300 | } else { |
michael@0 | 301 | throw new ParseException |
michael@0 | 302 | ("Invalid character after token (pos " + from + |
michael@0 | 303 | "): " + this.currentHeader); |
michael@0 | 304 | } |
michael@0 | 305 | } |
michael@0 | 306 | |
michael@0 | 307 | return from; |
michael@0 | 308 | } |
michael@0 | 309 | |
michael@0 | 310 | |
michael@0 | 311 | /** |
michael@0 | 312 | * Determines the ending position of the current token. |
michael@0 | 313 | * This method will not leave the current header value, |
michael@0 | 314 | * since the end of the header value is a token boundary. |
michael@0 | 315 | * |
michael@0 | 316 | * @param from the position of the first character of the token |
michael@0 | 317 | * |
michael@0 | 318 | * @return the position after the last character of the token. |
michael@0 | 319 | * The behavior is undefined if <code>from</code> does not |
michael@0 | 320 | * point to a token character in the current header value. |
michael@0 | 321 | */ |
michael@0 | 322 | protected int findTokenEnd(int from) { |
michael@0 | 323 | if (from < 0) { |
michael@0 | 324 | throw new IllegalArgumentException |
michael@0 | 325 | ("Token start position must not be negative: " + from); |
michael@0 | 326 | } |
michael@0 | 327 | |
michael@0 | 328 | final int to = this.currentHeader.length(); |
michael@0 | 329 | int end = from+1; |
michael@0 | 330 | while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) { |
michael@0 | 331 | end++; |
michael@0 | 332 | } |
michael@0 | 333 | |
michael@0 | 334 | return end; |
michael@0 | 335 | } |
michael@0 | 336 | |
michael@0 | 337 | |
michael@0 | 338 | /** |
michael@0 | 339 | * Checks whether a character is a token separator. |
michael@0 | 340 | * RFC 2616, section 2.1 defines comma as the separator for |
michael@0 | 341 | * <tt>#token</tt> sequences. The end of a header value will |
michael@0 | 342 | * also separate tokens, but that is not a character check. |
michael@0 | 343 | * |
michael@0 | 344 | * @param ch the character to check |
michael@0 | 345 | * |
michael@0 | 346 | * @return <code>true</code> if the character is a token separator, |
michael@0 | 347 | * <code>false</code> otherwise |
michael@0 | 348 | */ |
michael@0 | 349 | protected boolean isTokenSeparator(char ch) { |
michael@0 | 350 | return (ch == ','); |
michael@0 | 351 | } |
michael@0 | 352 | |
michael@0 | 353 | |
michael@0 | 354 | /** |
michael@0 | 355 | * Checks whether a character is a whitespace character. |
michael@0 | 356 | * RFC 2616, section 2.2 defines space and horizontal tab as whitespace. |
michael@0 | 357 | * The optional preceeding line break is irrelevant, since header |
michael@0 | 358 | * continuation is handled transparently when parsing messages. |
michael@0 | 359 | * |
michael@0 | 360 | * @param ch the character to check |
michael@0 | 361 | * |
michael@0 | 362 | * @return <code>true</code> if the character is whitespace, |
michael@0 | 363 | * <code>false</code> otherwise |
michael@0 | 364 | */ |
michael@0 | 365 | protected boolean isWhitespace(char ch) { |
michael@0 | 366 | |
michael@0 | 367 | // we do not use Character.isWhitspace(ch) here, since that allows |
michael@0 | 368 | // many control characters which are not whitespace as per RFC 2616 |
michael@0 | 369 | return ((ch == '\t') || Character.isSpaceChar(ch)); |
michael@0 | 370 | } |
michael@0 | 371 | |
michael@0 | 372 | |
michael@0 | 373 | /** |
michael@0 | 374 | * Checks whether a character is a valid token character. |
michael@0 | 375 | * Whitespace, control characters, and HTTP separators are not |
michael@0 | 376 | * valid token characters. The HTTP specification (RFC 2616, section 2.2) |
michael@0 | 377 | * defines tokens only for the US-ASCII character set, this |
michael@0 | 378 | * method extends the definition to other character sets. |
michael@0 | 379 | * |
michael@0 | 380 | * @param ch the character to check |
michael@0 | 381 | * |
michael@0 | 382 | * @return <code>true</code> if the character is a valid token start, |
michael@0 | 383 | * <code>false</code> otherwise |
michael@0 | 384 | */ |
michael@0 | 385 | protected boolean isTokenChar(char ch) { |
michael@0 | 386 | |
michael@0 | 387 | // common sense extension of ALPHA + DIGIT |
michael@0 | 388 | if (Character.isLetterOrDigit(ch)) |
michael@0 | 389 | return true; |
michael@0 | 390 | |
michael@0 | 391 | // common sense extension of CTL |
michael@0 | 392 | if (Character.isISOControl(ch)) |
michael@0 | 393 | return false; |
michael@0 | 394 | |
michael@0 | 395 | // no common sense extension for this |
michael@0 | 396 | if (isHttpSeparator(ch)) |
michael@0 | 397 | return false; |
michael@0 | 398 | |
michael@0 | 399 | // RFC 2616, section 2.2 defines a token character as |
michael@0 | 400 | // "any CHAR except CTLs or separators". The controls |
michael@0 | 401 | // and separators are included in the checks above. |
michael@0 | 402 | // This will yield unexpected results for Unicode format characters. |
michael@0 | 403 | // If that is a problem, overwrite isHttpSeparator(char) to filter |
michael@0 | 404 | // out the false positives. |
michael@0 | 405 | return true; |
michael@0 | 406 | } |
michael@0 | 407 | |
michael@0 | 408 | |
michael@0 | 409 | /** |
michael@0 | 410 | * Checks whether a character is an HTTP separator. |
michael@0 | 411 | * The implementation in this class checks only for the HTTP separators |
michael@0 | 412 | * defined in RFC 2616, section 2.2. If you need to detect other |
michael@0 | 413 | * separators beyond the US-ASCII character set, override this method. |
michael@0 | 414 | * |
michael@0 | 415 | * @param ch the character to check |
michael@0 | 416 | * |
michael@0 | 417 | * @return <code>true</code> if the character is an HTTP separator |
michael@0 | 418 | */ |
michael@0 | 419 | protected boolean isHttpSeparator(char ch) { |
michael@0 | 420 | return (HTTP_SEPARATORS.indexOf(ch) >= 0); |
michael@0 | 421 | } |
michael@0 | 422 | |
michael@0 | 423 | |
michael@0 | 424 | } // class BasicTokenIterator |
michael@0 | 425 |