diff -r 000000000000 -r fb9019fb1bf7 src/net/fortuna/ical4j/util/Uris.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/net/fortuna/ical4j/util/Uris.java Tue Feb 10 18:12:00 2015 +0100 @@ -0,0 +1,144 @@ +/** + * Copyright (c) 2012, Ben Fortuna + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * o Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * o Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * o Neither the name of Ben Fortuna nor the names of any other contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package net.fortuna.ical4j.util; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.regex.Pattern; + +/** + * $Id$ + * + * Created on 11/09/2005 + * + * Utility methods for working with URIs. + * @author Ben Fortuna + */ +public final class Uris { + + /** + * URI Scheme used when relaxed parsing is enabled and the given input stream lead to an invalid URI. + */ + public static final String INVALID_SCHEME = "net.fortunal.ical4j.invalid"; + + private static final Pattern CID_PATTERN = Pattern.compile("(?i)^cid:.*"); + private static final Pattern NOTES_CID_REPLACEMENT_PATTERN = Pattern.compile("[<>]"); + + /** + * Constructor made private to enforce static nature. + */ + private Uris() { + } + + /** + * Encodes the specified URI string using the UTF-8 charset. In the event that an exception is thrown, the specifed + * URI string is returned unmodified. + * @param s a URI string + * @return an encoded URI string + */ + public static String encode(final String s) { + /* + * try { return URLEncoder.encode(s, ENCODING_CHARSET); } catch (UnsupportedEncodingException use) { + * log.error("Error ocurred encoding URI [" + s + "]", use); } + */ + + /* + * Lotus Notes does not correctly strip angle brackets from cid uris. From RFC2392: A "cid" URL is converted to + * the corresponding Content-ID message header [MIME] by removing the "cid:" prefix, converting the % encoded + * character to their equivalent US-ASCII characters, and enclosing the remaining parts with an angle bracket + * pair, "<" and ">". For example, "cid:foo4%25foo1@bar.net" corresponds to Content-ID: + * Reversing the process and converting URL special characters to their % encodings produces the original cid. A + * "mid" URL is converted to a Message-ID or Message-ID/Content-ID pair in a similar fashion. + */ + if (CompatibilityHints.isHintEnabled(CompatibilityHints.KEY_NOTES_COMPATIBILITY) + && CID_PATTERN.matcher(s).matches()) { + + return NOTES_CID_REPLACEMENT_PATTERN.matcher(s).replaceAll(""); + } + return s; + } + + /** + * Decodes the specified URI string using the UTF-8 charset. In the event that an exception is thrown, the specifed + * URI string is returned unmodified. + * @param s a URI string + * @return an encoded URI string + */ + public static String decode(final String s) { + /* + * try { return URLDecoder.decode(s, ENCODING_CHARSET); } catch (UnsupportedEncodingException use) { + * log.error("Error ocurred decoding URI [" + s + "]", use); } + */ + return s; + } + + /** + * Attempts to create a URI instance and will optionally swallow any resulting URISyntaxException depending on + * configured {@link CompatibilityHints}. Will also automatically attempt encoding of the string representation for + * greater compatibility. + *

When relaxed parsing is enabled and if the string representation is not valid, a second URI creation attempt is made + * by extracting the scheme from the scheme specific part and URI encoding that later part. For example, + * "mailto: joe smith@example.com" becomes "mailto:joe%20smith@example.com".

+ *

If the second attempts also leads to a {@code URISyntaxException}, an opaque URI is constructed with a scheme + * of {@code Uris.INVALID_SCHEME} and a value corresponding to the initial representation.

+ * + * @param s a string representation of a URI. + * @return a URI instance, which may not correspond to the URI string if a valid + * URI string is not specified and relaxed parsing is enabled. + * @throws URISyntaxException if a valid URI string is not specified and relaxed parsing is disabled + */ + public static URI create(final String s) throws URISyntaxException { + try { + return new URI(encode(s)); + } + catch (URISyntaxException use) { + if (CompatibilityHints + .isHintEnabled(CompatibilityHints.KEY_RELAXED_PARSING)) { + String encoded = encode(s); + int index = encoded.indexOf(':'); + if (index != -1 && index < encoded.length() -1) { + try { + return new URI(encoded.substring(0, index), encoded.substring(index + 1), null); + } catch (URISyntaxException use2) { + } + } + try { + return new URI(INVALID_SCHEME, s, null); + } catch (URISyntaxException use2) { + // should not happen as we are building an opaque URI + throw new IllegalArgumentException("Could not build URI from " + s); + } + } + throw use; + } + } +}