1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/mobile/android/thirdparty/ch/boye/httpclientandroidlib/message/BasicTokenIterator.java Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,425 @@ 1.4 +/* 1.5 + * ==================================================================== 1.6 + * Licensed to the Apache Software Foundation (ASF) under one 1.7 + * or more contributor license agreements. See the NOTICE file 1.8 + * distributed with this work for additional information 1.9 + * regarding copyright ownership. The ASF licenses this file 1.10 + * to you under the Apache License, Version 2.0 (the 1.11 + * "License"); you may not use this file except in compliance 1.12 + * with the License. You may obtain a copy of the License at 1.13 + * 1.14 + * http://www.apache.org/licenses/LICENSE-2.0 1.15 + * 1.16 + * Unless required by applicable law or agreed to in writing, 1.17 + * software distributed under the License is distributed on an 1.18 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 1.19 + * KIND, either express or implied. See the License for the 1.20 + * specific language governing permissions and limitations 1.21 + * under the License. 1.22 + * ==================================================================== 1.23 + * 1.24 + * This software consists of voluntary contributions made by many 1.25 + * individuals on behalf of the Apache Software Foundation. For more 1.26 + * information on the Apache Software Foundation, please see 1.27 + * <http://www.apache.org/>. 1.28 + * 1.29 + */ 1.30 + 1.31 +package ch.boye.httpclientandroidlib.message; 1.32 + 1.33 +import java.util.NoSuchElementException; 1.34 + 1.35 +import ch.boye.httpclientandroidlib.HeaderIterator; 1.36 +import ch.boye.httpclientandroidlib.ParseException; 1.37 +import ch.boye.httpclientandroidlib.TokenIterator; 1.38 + 1.39 +/** 1.40 + * Basic implementation of a {@link TokenIterator}. 1.41 + * This implementation parses <tt>#token<tt> sequences as 1.42 + * defined by RFC 2616, section 2. 1.43 + * It extends that definition somewhat beyond US-ASCII. 1.44 + * 1.45 + * @since 4.0 1.46 + */ 1.47 +public class BasicTokenIterator implements TokenIterator { 1.48 + 1.49 + /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */ 1.50 + // the order of the characters here is adjusted to put the 1.51 + // most likely candidates at the beginning of the collection 1.52 + public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t"; 1.53 + 1.54 + 1.55 + /** The iterator from which to obtain the next header. */ 1.56 + protected final HeaderIterator headerIt; 1.57 + 1.58 + /** 1.59 + * The value of the current header. 1.60 + * This is the header value that includes {@link #currentToken}. 1.61 + * Undefined if the iteration is over. 1.62 + */ 1.63 + protected String currentHeader; 1.64 + 1.65 + /** 1.66 + * The token to be returned by the next call to {@link #currentToken}. 1.67 + * <code>null</code> if the iteration is over. 1.68 + */ 1.69 + protected String currentToken; 1.70 + 1.71 + /** 1.72 + * The position after {@link #currentToken} in {@link #currentHeader}. 1.73 + * Undefined if the iteration is over. 1.74 + */ 1.75 + protected int searchPos; 1.76 + 1.77 + 1.78 + /** 1.79 + * Creates a new instance of {@link BasicTokenIterator}. 1.80 + * 1.81 + * @param headerIterator the iterator for the headers to tokenize 1.82 + */ 1.83 + public BasicTokenIterator(final HeaderIterator headerIterator) { 1.84 + if (headerIterator == null) { 1.85 + throw new IllegalArgumentException 1.86 + ("Header iterator must not be null."); 1.87 + } 1.88 + 1.89 + this.headerIt = headerIterator; 1.90 + this.searchPos = findNext(-1); 1.91 + } 1.92 + 1.93 + 1.94 + // non-javadoc, see interface TokenIterator 1.95 + public boolean hasNext() { 1.96 + return (this.currentToken != null); 1.97 + } 1.98 + 1.99 + 1.100 + /** 1.101 + * Obtains the next token from this iteration. 1.102 + * 1.103 + * @return the next token in this iteration 1.104 + * 1.105 + * @throws NoSuchElementException if the iteration is already over 1.106 + * @throws ParseException if an invalid header value is encountered 1.107 + */ 1.108 + public String nextToken() 1.109 + throws NoSuchElementException, ParseException { 1.110 + 1.111 + if (this.currentToken == null) { 1.112 + throw new NoSuchElementException("Iteration already finished."); 1.113 + } 1.114 + 1.115 + final String result = this.currentToken; 1.116 + // updates currentToken, may trigger ParseException: 1.117 + this.searchPos = findNext(this.searchPos); 1.118 + 1.119 + return result; 1.120 + } 1.121 + 1.122 + 1.123 + /** 1.124 + * Returns the next token. 1.125 + * Same as {@link #nextToken}, but with generic return type. 1.126 + * 1.127 + * @return the next token in this iteration 1.128 + * 1.129 + * @throws NoSuchElementException if there are no more tokens 1.130 + * @throws ParseException if an invalid header value is encountered 1.131 + */ 1.132 + public final Object next() 1.133 + throws NoSuchElementException, ParseException { 1.134 + return nextToken(); 1.135 + } 1.136 + 1.137 + 1.138 + /** 1.139 + * Removing tokens is not supported. 1.140 + * 1.141 + * @throws UnsupportedOperationException always 1.142 + */ 1.143 + public final void remove() 1.144 + throws UnsupportedOperationException { 1.145 + 1.146 + throw new UnsupportedOperationException 1.147 + ("Removing tokens is not supported."); 1.148 + } 1.149 + 1.150 + 1.151 + /** 1.152 + * Determines the next token. 1.153 + * If found, the token is stored in {@link #currentToken}. 1.154 + * The return value indicates the position after the token 1.155 + * in {@link #currentHeader}. If necessary, the next header 1.156 + * will be obtained from {@link #headerIt}. 1.157 + * If not found, {@link #currentToken} is set to <code>null</code>. 1.158 + * 1.159 + * @param from the position in the current header at which to 1.160 + * start the search, -1 to search in the first header 1.161 + * 1.162 + * @return the position after the found token in the current header, or 1.163 + * negative if there was no next token 1.164 + * 1.165 + * @throws ParseException if an invalid header value is encountered 1.166 + */ 1.167 + protected int findNext(int from) 1.168 + throws ParseException { 1.169 + 1.170 + if (from < 0) { 1.171 + // called from the constructor, initialize the first header 1.172 + if (!this.headerIt.hasNext()) { 1.173 + return -1; 1.174 + } 1.175 + this.currentHeader = this.headerIt.nextHeader().getValue(); 1.176 + from = 0; 1.177 + } else { 1.178 + // called after a token, make sure there is a separator 1.179 + from = findTokenSeparator(from); 1.180 + } 1.181 + 1.182 + int start = findTokenStart(from); 1.183 + if (start < 0) { 1.184 + this.currentToken = null; 1.185 + return -1; // nothing found 1.186 + } 1.187 + 1.188 + int end = findTokenEnd(start); 1.189 + this.currentToken = createToken(this.currentHeader, start, end); 1.190 + return end; 1.191 + } 1.192 + 1.193 + 1.194 + /** 1.195 + * Creates a new token to be returned. 1.196 + * Called from {@link #findNext findNext} after the token is identified. 1.197 + * The default implementation simply calls 1.198 + * {@link java.lang.String#substring String.substring}. 1.199 + * <br/> 1.200 + * If header values are significantly longer than tokens, and some 1.201 + * tokens are permanently referenced by the application, there can 1.202 + * be problems with garbage collection. A substring will hold a 1.203 + * reference to the full characters of the original string and 1.204 + * therefore occupies more memory than might be expected. 1.205 + * To avoid this, override this method and create a new string 1.206 + * instead of a substring. 1.207 + * 1.208 + * @param value the full header value from which to create a token 1.209 + * @param start the index of the first token character 1.210 + * @param end the index after the last token character 1.211 + * 1.212 + * @return a string representing the token identified by the arguments 1.213 + */ 1.214 + protected String createToken(String value, int start, int end) { 1.215 + return value.substring(start, end); 1.216 + } 1.217 + 1.218 + 1.219 + /** 1.220 + * Determines the starting position of the next token. 1.221 + * This method will iterate over headers if necessary. 1.222 + * 1.223 + * @param from the position in the current header at which to 1.224 + * start the search 1.225 + * 1.226 + * @return the position of the token start in the current header, 1.227 + * negative if no token start could be found 1.228 + */ 1.229 + protected int findTokenStart(int from) { 1.230 + if (from < 0) { 1.231 + throw new IllegalArgumentException 1.232 + ("Search position must not be negative: " + from); 1.233 + } 1.234 + 1.235 + boolean found = false; 1.236 + while (!found && (this.currentHeader != null)) { 1.237 + 1.238 + final int to = this.currentHeader.length(); 1.239 + while (!found && (from < to)) { 1.240 + 1.241 + final char ch = this.currentHeader.charAt(from); 1.242 + if (isTokenSeparator(ch) || isWhitespace(ch)) { 1.243 + // whitspace and token separators are skipped 1.244 + from++; 1.245 + } else if (isTokenChar(this.currentHeader.charAt(from))) { 1.246 + // found the start of a token 1.247 + found = true; 1.248 + } else { 1.249 + throw new ParseException 1.250 + ("Invalid character before token (pos " + from + 1.251 + "): " + this.currentHeader); 1.252 + } 1.253 + } 1.254 + if (!found) { 1.255 + if (this.headerIt.hasNext()) { 1.256 + this.currentHeader = this.headerIt.nextHeader().getValue(); 1.257 + from = 0; 1.258 + } else { 1.259 + this.currentHeader = null; 1.260 + } 1.261 + } 1.262 + } // while headers 1.263 + 1.264 + return found ? from : -1; 1.265 + } 1.266 + 1.267 + 1.268 + /** 1.269 + * Determines the position of the next token separator. 1.270 + * Because of multi-header joining rules, the end of a 1.271 + * header value is a token separator. This method does 1.272 + * therefore not need to iterate over headers. 1.273 + * 1.274 + * @param from the position in the current header at which to 1.275 + * start the search 1.276 + * 1.277 + * @return the position of a token separator in the current header, 1.278 + * or at the end 1.279 + * 1.280 + * @throws ParseException 1.281 + * if a new token is found before a token separator. 1.282 + * RFC 2616, section 2.1 explicitly requires a comma between 1.283 + * tokens for <tt>#</tt>. 1.284 + */ 1.285 + protected int findTokenSeparator(int from) { 1.286 + if (from < 0) { 1.287 + throw new IllegalArgumentException 1.288 + ("Search position must not be negative: " + from); 1.289 + } 1.290 + 1.291 + boolean found = false; 1.292 + final int to = this.currentHeader.length(); 1.293 + while (!found && (from < to)) { 1.294 + final char ch = this.currentHeader.charAt(from); 1.295 + if (isTokenSeparator(ch)) { 1.296 + found = true; 1.297 + } else if (isWhitespace(ch)) { 1.298 + from++; 1.299 + } else if (isTokenChar(ch)) { 1.300 + throw new ParseException 1.301 + ("Tokens without separator (pos " + from + 1.302 + "): " + this.currentHeader); 1.303 + } else { 1.304 + throw new ParseException 1.305 + ("Invalid character after token (pos " + from + 1.306 + "): " + this.currentHeader); 1.307 + } 1.308 + } 1.309 + 1.310 + return from; 1.311 + } 1.312 + 1.313 + 1.314 + /** 1.315 + * Determines the ending position of the current token. 1.316 + * This method will not leave the current header value, 1.317 + * since the end of the header value is a token boundary. 1.318 + * 1.319 + * @param from the position of the first character of the token 1.320 + * 1.321 + * @return the position after the last character of the token. 1.322 + * The behavior is undefined if <code>from</code> does not 1.323 + * point to a token character in the current header value. 1.324 + */ 1.325 + protected int findTokenEnd(int from) { 1.326 + if (from < 0) { 1.327 + throw new IllegalArgumentException 1.328 + ("Token start position must not be negative: " + from); 1.329 + } 1.330 + 1.331 + final int to = this.currentHeader.length(); 1.332 + int end = from+1; 1.333 + while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) { 1.334 + end++; 1.335 + } 1.336 + 1.337 + return end; 1.338 + } 1.339 + 1.340 + 1.341 + /** 1.342 + * Checks whether a character is a token separator. 1.343 + * RFC 2616, section 2.1 defines comma as the separator for 1.344 + * <tt>#token</tt> sequences. The end of a header value will 1.345 + * also separate tokens, but that is not a character check. 1.346 + * 1.347 + * @param ch the character to check 1.348 + * 1.349 + * @return <code>true</code> if the character is a token separator, 1.350 + * <code>false</code> otherwise 1.351 + */ 1.352 + protected boolean isTokenSeparator(char ch) { 1.353 + return (ch == ','); 1.354 + } 1.355 + 1.356 + 1.357 + /** 1.358 + * Checks whether a character is a whitespace character. 1.359 + * RFC 2616, section 2.2 defines space and horizontal tab as whitespace. 1.360 + * The optional preceeding line break is irrelevant, since header 1.361 + * continuation is handled transparently when parsing messages. 1.362 + * 1.363 + * @param ch the character to check 1.364 + * 1.365 + * @return <code>true</code> if the character is whitespace, 1.366 + * <code>false</code> otherwise 1.367 + */ 1.368 + protected boolean isWhitespace(char ch) { 1.369 + 1.370 + // we do not use Character.isWhitspace(ch) here, since that allows 1.371 + // many control characters which are not whitespace as per RFC 2616 1.372 + return ((ch == '\t') || Character.isSpaceChar(ch)); 1.373 + } 1.374 + 1.375 + 1.376 + /** 1.377 + * Checks whether a character is a valid token character. 1.378 + * Whitespace, control characters, and HTTP separators are not 1.379 + * valid token characters. The HTTP specification (RFC 2616, section 2.2) 1.380 + * defines tokens only for the US-ASCII character set, this 1.381 + * method extends the definition to other character sets. 1.382 + * 1.383 + * @param ch the character to check 1.384 + * 1.385 + * @return <code>true</code> if the character is a valid token start, 1.386 + * <code>false</code> otherwise 1.387 + */ 1.388 + protected boolean isTokenChar(char ch) { 1.389 + 1.390 + // common sense extension of ALPHA + DIGIT 1.391 + if (Character.isLetterOrDigit(ch)) 1.392 + return true; 1.393 + 1.394 + // common sense extension of CTL 1.395 + if (Character.isISOControl(ch)) 1.396 + return false; 1.397 + 1.398 + // no common sense extension for this 1.399 + if (isHttpSeparator(ch)) 1.400 + return false; 1.401 + 1.402 + // RFC 2616, section 2.2 defines a token character as 1.403 + // "any CHAR except CTLs or separators". The controls 1.404 + // and separators are included in the checks above. 1.405 + // This will yield unexpected results for Unicode format characters. 1.406 + // If that is a problem, overwrite isHttpSeparator(char) to filter 1.407 + // out the false positives. 1.408 + return true; 1.409 + } 1.410 + 1.411 + 1.412 + /** 1.413 + * Checks whether a character is an HTTP separator. 1.414 + * The implementation in this class checks only for the HTTP separators 1.415 + * defined in RFC 2616, section 2.2. If you need to detect other 1.416 + * separators beyond the US-ASCII character set, override this method. 1.417 + * 1.418 + * @param ch the character to check 1.419 + * 1.420 + * @return <code>true</code> if the character is an HTTP separator 1.421 + */ 1.422 + protected boolean isHttpSeparator(char ch) { 1.423 + return (HTTP_SEPARATORS.indexOf(ch) >= 0); 1.424 + } 1.425 + 1.426 + 1.427 +} // class BasicTokenIterator 1.428 +