Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
1 /*
2 * ====================================================================
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 * ====================================================================
20 *
21 * This software consists of voluntary contributions made by many
22 * individuals on behalf of the Apache Software Foundation. For more
23 * information on the Apache Software Foundation, please see
24 * <http://www.apache.org/>.
25 *
26 */
28 package ch.boye.httpclientandroidlib.message;
30 import java.util.NoSuchElementException;
32 import ch.boye.httpclientandroidlib.HeaderIterator;
33 import ch.boye.httpclientandroidlib.ParseException;
34 import ch.boye.httpclientandroidlib.TokenIterator;
36 /**
37 * Basic implementation of a {@link TokenIterator}.
38 * This implementation parses <tt>#token<tt> sequences as
39 * defined by RFC 2616, section 2.
40 * It extends that definition somewhat beyond US-ASCII.
41 *
42 * @since 4.0
43 */
44 public class BasicTokenIterator implements TokenIterator {
46 /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
47 // the order of the characters here is adjusted to put the
48 // most likely candidates at the beginning of the collection
49 public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
52 /** The iterator from which to obtain the next header. */
53 protected final HeaderIterator headerIt;
55 /**
56 * The value of the current header.
57 * This is the header value that includes {@link #currentToken}.
58 * Undefined if the iteration is over.
59 */
60 protected String currentHeader;
62 /**
63 * The token to be returned by the next call to {@link #currentToken}.
64 * <code>null</code> if the iteration is over.
65 */
66 protected String currentToken;
68 /**
69 * The position after {@link #currentToken} in {@link #currentHeader}.
70 * Undefined if the iteration is over.
71 */
72 protected int searchPos;
75 /**
76 * Creates a new instance of {@link BasicTokenIterator}.
77 *
78 * @param headerIterator the iterator for the headers to tokenize
79 */
80 public BasicTokenIterator(final HeaderIterator headerIterator) {
81 if (headerIterator == null) {
82 throw new IllegalArgumentException
83 ("Header iterator must not be null.");
84 }
86 this.headerIt = headerIterator;
87 this.searchPos = findNext(-1);
88 }
91 // non-javadoc, see interface TokenIterator
92 public boolean hasNext() {
93 return (this.currentToken != null);
94 }
97 /**
98 * Obtains the next token from this iteration.
99 *
100 * @return the next token in this iteration
101 *
102 * @throws NoSuchElementException if the iteration is already over
103 * @throws ParseException if an invalid header value is encountered
104 */
105 public String nextToken()
106 throws NoSuchElementException, ParseException {
108 if (this.currentToken == null) {
109 throw new NoSuchElementException("Iteration already finished.");
110 }
112 final String result = this.currentToken;
113 // updates currentToken, may trigger ParseException:
114 this.searchPos = findNext(this.searchPos);
116 return result;
117 }
120 /**
121 * Returns the next token.
122 * Same as {@link #nextToken}, but with generic return type.
123 *
124 * @return the next token in this iteration
125 *
126 * @throws NoSuchElementException if there are no more tokens
127 * @throws ParseException if an invalid header value is encountered
128 */
129 public final Object next()
130 throws NoSuchElementException, ParseException {
131 return nextToken();
132 }
135 /**
136 * Removing tokens is not supported.
137 *
138 * @throws UnsupportedOperationException always
139 */
140 public final void remove()
141 throws UnsupportedOperationException {
143 throw new UnsupportedOperationException
144 ("Removing tokens is not supported.");
145 }
148 /**
149 * Determines the next token.
150 * If found, the token is stored in {@link #currentToken}.
151 * The return value indicates the position after the token
152 * in {@link #currentHeader}. If necessary, the next header
153 * will be obtained from {@link #headerIt}.
154 * If not found, {@link #currentToken} is set to <code>null</code>.
155 *
156 * @param from the position in the current header at which to
157 * start the search, -1 to search in the first header
158 *
159 * @return the position after the found token in the current header, or
160 * negative if there was no next token
161 *
162 * @throws ParseException if an invalid header value is encountered
163 */
164 protected int findNext(int from)
165 throws ParseException {
167 if (from < 0) {
168 // called from the constructor, initialize the first header
169 if (!this.headerIt.hasNext()) {
170 return -1;
171 }
172 this.currentHeader = this.headerIt.nextHeader().getValue();
173 from = 0;
174 } else {
175 // called after a token, make sure there is a separator
176 from = findTokenSeparator(from);
177 }
179 int start = findTokenStart(from);
180 if (start < 0) {
181 this.currentToken = null;
182 return -1; // nothing found
183 }
185 int end = findTokenEnd(start);
186 this.currentToken = createToken(this.currentHeader, start, end);
187 return end;
188 }
191 /**
192 * Creates a new token to be returned.
193 * Called from {@link #findNext findNext} after the token is identified.
194 * The default implementation simply calls
195 * {@link java.lang.String#substring String.substring}.
196 * <br/>
197 * If header values are significantly longer than tokens, and some
198 * tokens are permanently referenced by the application, there can
199 * be problems with garbage collection. A substring will hold a
200 * reference to the full characters of the original string and
201 * therefore occupies more memory than might be expected.
202 * To avoid this, override this method and create a new string
203 * instead of a substring.
204 *
205 * @param value the full header value from which to create a token
206 * @param start the index of the first token character
207 * @param end the index after the last token character
208 *
209 * @return a string representing the token identified by the arguments
210 */
211 protected String createToken(String value, int start, int end) {
212 return value.substring(start, end);
213 }
216 /**
217 * Determines the starting position of the next token.
218 * This method will iterate over headers if necessary.
219 *
220 * @param from the position in the current header at which to
221 * start the search
222 *
223 * @return the position of the token start in the current header,
224 * negative if no token start could be found
225 */
226 protected int findTokenStart(int from) {
227 if (from < 0) {
228 throw new IllegalArgumentException
229 ("Search position must not be negative: " + from);
230 }
232 boolean found = false;
233 while (!found && (this.currentHeader != null)) {
235 final int to = this.currentHeader.length();
236 while (!found && (from < to)) {
238 final char ch = this.currentHeader.charAt(from);
239 if (isTokenSeparator(ch) || isWhitespace(ch)) {
240 // whitspace and token separators are skipped
241 from++;
242 } else if (isTokenChar(this.currentHeader.charAt(from))) {
243 // found the start of a token
244 found = true;
245 } else {
246 throw new ParseException
247 ("Invalid character before token (pos " + from +
248 "): " + this.currentHeader);
249 }
250 }
251 if (!found) {
252 if (this.headerIt.hasNext()) {
253 this.currentHeader = this.headerIt.nextHeader().getValue();
254 from = 0;
255 } else {
256 this.currentHeader = null;
257 }
258 }
259 } // while headers
261 return found ? from : -1;
262 }
265 /**
266 * Determines the position of the next token separator.
267 * Because of multi-header joining rules, the end of a
268 * header value is a token separator. This method does
269 * therefore not need to iterate over headers.
270 *
271 * @param from the position in the current header at which to
272 * start the search
273 *
274 * @return the position of a token separator in the current header,
275 * or at the end
276 *
277 * @throws ParseException
278 * if a new token is found before a token separator.
279 * RFC 2616, section 2.1 explicitly requires a comma between
280 * tokens for <tt>#</tt>.
281 */
282 protected int findTokenSeparator(int from) {
283 if (from < 0) {
284 throw new IllegalArgumentException
285 ("Search position must not be negative: " + from);
286 }
288 boolean found = false;
289 final int to = this.currentHeader.length();
290 while (!found && (from < to)) {
291 final char ch = this.currentHeader.charAt(from);
292 if (isTokenSeparator(ch)) {
293 found = true;
294 } else if (isWhitespace(ch)) {
295 from++;
296 } else if (isTokenChar(ch)) {
297 throw new ParseException
298 ("Tokens without separator (pos " + from +
299 "): " + this.currentHeader);
300 } else {
301 throw new ParseException
302 ("Invalid character after token (pos " + from +
303 "): " + this.currentHeader);
304 }
305 }
307 return from;
308 }
311 /**
312 * Determines the ending position of the current token.
313 * This method will not leave the current header value,
314 * since the end of the header value is a token boundary.
315 *
316 * @param from the position of the first character of the token
317 *
318 * @return the position after the last character of the token.
319 * The behavior is undefined if <code>from</code> does not
320 * point to a token character in the current header value.
321 */
322 protected int findTokenEnd(int from) {
323 if (from < 0) {
324 throw new IllegalArgumentException
325 ("Token start position must not be negative: " + from);
326 }
328 final int to = this.currentHeader.length();
329 int end = from+1;
330 while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
331 end++;
332 }
334 return end;
335 }
338 /**
339 * Checks whether a character is a token separator.
340 * RFC 2616, section 2.1 defines comma as the separator for
341 * <tt>#token</tt> sequences. The end of a header value will
342 * also separate tokens, but that is not a character check.
343 *
344 * @param ch the character to check
345 *
346 * @return <code>true</code> if the character is a token separator,
347 * <code>false</code> otherwise
348 */
349 protected boolean isTokenSeparator(char ch) {
350 return (ch == ',');
351 }
354 /**
355 * Checks whether a character is a whitespace character.
356 * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
357 * The optional preceeding line break is irrelevant, since header
358 * continuation is handled transparently when parsing messages.
359 *
360 * @param ch the character to check
361 *
362 * @return <code>true</code> if the character is whitespace,
363 * <code>false</code> otherwise
364 */
365 protected boolean isWhitespace(char ch) {
367 // we do not use Character.isWhitspace(ch) here, since that allows
368 // many control characters which are not whitespace as per RFC 2616
369 return ((ch == '\t') || Character.isSpaceChar(ch));
370 }
373 /**
374 * Checks whether a character is a valid token character.
375 * Whitespace, control characters, and HTTP separators are not
376 * valid token characters. The HTTP specification (RFC 2616, section 2.2)
377 * defines tokens only for the US-ASCII character set, this
378 * method extends the definition to other character sets.
379 *
380 * @param ch the character to check
381 *
382 * @return <code>true</code> if the character is a valid token start,
383 * <code>false</code> otherwise
384 */
385 protected boolean isTokenChar(char ch) {
387 // common sense extension of ALPHA + DIGIT
388 if (Character.isLetterOrDigit(ch))
389 return true;
391 // common sense extension of CTL
392 if (Character.isISOControl(ch))
393 return false;
395 // no common sense extension for this
396 if (isHttpSeparator(ch))
397 return false;
399 // RFC 2616, section 2.2 defines a token character as
400 // "any CHAR except CTLs or separators". The controls
401 // and separators are included in the checks above.
402 // This will yield unexpected results for Unicode format characters.
403 // If that is a problem, overwrite isHttpSeparator(char) to filter
404 // out the false positives.
405 return true;
406 }
409 /**
410 * Checks whether a character is an HTTP separator.
411 * The implementation in this class checks only for the HTTP separators
412 * defined in RFC 2616, section 2.2. If you need to detect other
413 * separators beyond the US-ASCII character set, override this method.
414 *
415 * @param ch the character to check
416 *
417 * @return <code>true</code> if the character is an HTTP separator
418 */
419 protected boolean isHttpSeparator(char ch) {
420 return (HTTP_SEPARATORS.indexOf(ch) >= 0);
421 }
424 } // class BasicTokenIterator