michael@0: /** michael@0: * Copyright (c) 2012, Ben Fortuna michael@0: * All rights reserved. michael@0: * michael@0: * Redistribution and use in source and binary forms, with or without michael@0: * modification, are permitted provided that the following conditions michael@0: * are met: michael@0: * michael@0: * o Redistributions of source code must retain the above copyright michael@0: * notice, this list of conditions and the following disclaimer. michael@0: * michael@0: * o Redistributions in binary form must reproduce the above copyright michael@0: * notice, this list of conditions and the following disclaimer in the michael@0: * documentation and/or other materials provided with the distribution. michael@0: * michael@0: * o Neither the name of Ben Fortuna nor the names of any other contributors michael@0: * may be used to endorse or promote products derived from this software michael@0: * without specific prior written permission. michael@0: * michael@0: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR michael@0: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, michael@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, michael@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR michael@0: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF michael@0: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING michael@0: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS michael@0: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: package net.fortuna.ical4j.util; michael@0: michael@0: import java.net.URI; michael@0: import java.net.URISyntaxException; michael@0: import java.util.regex.Pattern; michael@0: michael@0: /** michael@0: * $Id$ michael@0: * michael@0: * Created on 11/09/2005 michael@0: * michael@0: * Utility methods for working with URIs. michael@0: * @author Ben Fortuna michael@0: */ michael@0: public final class Uris { michael@0: michael@0: /** michael@0: * URI Scheme used when relaxed parsing is enabled and the given input stream lead to an invalid URI. michael@0: */ michael@0: public static final String INVALID_SCHEME = "net.fortunal.ical4j.invalid"; michael@0: michael@0: private static final Pattern CID_PATTERN = Pattern.compile("(?i)^cid:.*"); michael@0: private static final Pattern NOTES_CID_REPLACEMENT_PATTERN = Pattern.compile("[<>]"); michael@0: michael@0: /** michael@0: * Constructor made private to enforce static nature. michael@0: */ michael@0: private Uris() { michael@0: } michael@0: michael@0: /** michael@0: * Encodes the specified URI string using the UTF-8 charset. In the event that an exception is thrown, the specifed michael@0: * URI string is returned unmodified. michael@0: * @param s a URI string michael@0: * @return an encoded URI string michael@0: */ michael@0: public static String encode(final String s) { michael@0: /* michael@0: * try { return URLEncoder.encode(s, ENCODING_CHARSET); } catch (UnsupportedEncodingException use) { michael@0: * log.error("Error ocurred encoding URI [" + s + "]", use); } michael@0: */ michael@0: michael@0: /* michael@0: * Lotus Notes does not correctly strip angle brackets from cid uris. From RFC2392: A "cid" URL is converted to michael@0: * the corresponding Content-ID message header [MIME] by removing the "cid:" prefix, converting the % encoded michael@0: * character to their equivalent US-ASCII characters, and enclosing the remaining parts with an angle bracket michael@0: * pair, "<" and ">". For example, "cid:foo4%25foo1@bar.net" corresponds to Content-ID: michael@0: * Reversing the process and converting URL special characters to their % encodings produces the original cid. A michael@0: * "mid" URL is converted to a Message-ID or Message-ID/Content-ID pair in a similar fashion. michael@0: */ michael@0: if (CompatibilityHints.isHintEnabled(CompatibilityHints.KEY_NOTES_COMPATIBILITY) michael@0: && CID_PATTERN.matcher(s).matches()) { michael@0: michael@0: return NOTES_CID_REPLACEMENT_PATTERN.matcher(s).replaceAll(""); michael@0: } michael@0: return s; michael@0: } michael@0: michael@0: /** michael@0: * Decodes the specified URI string using the UTF-8 charset. In the event that an exception is thrown, the specifed michael@0: * URI string is returned unmodified. michael@0: * @param s a URI string michael@0: * @return an encoded URI string michael@0: */ michael@0: public static String decode(final String s) { michael@0: /* michael@0: * try { return URLDecoder.decode(s, ENCODING_CHARSET); } catch (UnsupportedEncodingException use) { michael@0: * log.error("Error ocurred decoding URI [" + s + "]", use); } michael@0: */ michael@0: return s; michael@0: } michael@0: michael@0: /** michael@0: * Attempts to create a URI instance and will optionally swallow any resulting URISyntaxException depending on michael@0: * configured {@link CompatibilityHints}. Will also automatically attempt encoding of the string representation for michael@0: * greater compatibility. michael@0: *

When relaxed parsing is enabled and if the string representation is not valid, a second URI creation attempt is made michael@0: * by extracting the scheme from the scheme specific part and URI encoding that later part. For example, michael@0: * "mailto: joe smith@example.com" becomes "mailto:joe%20smith@example.com".

michael@0: *

If the second attempts also leads to a {@code URISyntaxException}, an opaque URI is constructed with a scheme michael@0: * of {@code Uris.INVALID_SCHEME} and a value corresponding to the initial representation.

michael@0: * michael@0: * @param s a string representation of a URI. michael@0: * @return a URI instance, which may not correspond to the URI string if a valid michael@0: * URI string is not specified and relaxed parsing is enabled. michael@0: * @throws URISyntaxException if a valid URI string is not specified and relaxed parsing is disabled michael@0: */ michael@0: public static URI create(final String s) throws URISyntaxException { michael@0: try { michael@0: return new URI(encode(s)); michael@0: } michael@0: catch (URISyntaxException use) { michael@0: if (CompatibilityHints michael@0: .isHintEnabled(CompatibilityHints.KEY_RELAXED_PARSING)) { michael@0: String encoded = encode(s); michael@0: int index = encoded.indexOf(':'); michael@0: if (index != -1 && index < encoded.length() -1) { michael@0: try { michael@0: return new URI(encoded.substring(0, index), encoded.substring(index + 1), null); michael@0: } catch (URISyntaxException use2) { michael@0: } michael@0: } michael@0: try { michael@0: return new URI(INVALID_SCHEME, s, null); michael@0: } catch (URISyntaxException use2) { michael@0: // should not happen as we are building an opaque URI michael@0: throw new IllegalArgumentException("Could not build URI from " + s); michael@0: } michael@0: } michael@0: throw use; michael@0: } michael@0: } michael@0: }