michael@0: /** michael@0: * Copyright (c) 2012, Ben Fortuna michael@0: * All rights reserved. michael@0: * michael@0: * Redistribution and use in source and binary forms, with or without michael@0: * modification, are permitted provided that the following conditions michael@0: * are met: michael@0: * michael@0: * o Redistributions of source code must retain the above copyright michael@0: * notice, this list of conditions and the following disclaimer. michael@0: * michael@0: * o Redistributions in binary form must reproduce the above copyright michael@0: * notice, this list of conditions and the following disclaimer in the michael@0: * documentation and/or other materials provided with the distribution. michael@0: * michael@0: * o Neither the name of Ben Fortuna nor the names of any other contributors michael@0: * may be used to endorse or promote products derived from this software michael@0: * without specific prior written permission. michael@0: * michael@0: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR michael@0: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, michael@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, michael@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR michael@0: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF michael@0: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING michael@0: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS michael@0: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: package net.fortuna.ical4j.data; michael@0: michael@0: import java.io.IOException; michael@0: import java.io.InputStream; michael@0: import java.io.InputStreamReader; michael@0: import java.io.Reader; michael@0: import java.io.StreamTokenizer; michael@0: import java.net.URISyntaxException; michael@0: import java.text.MessageFormat; michael@0: import java.text.ParseException; michael@0: michael@0: import net.fortuna.ical4j.model.Calendar; michael@0: import net.fortuna.ical4j.model.Component; michael@0: michael@0: import org.apache.commons.logging.Log; michael@0: import org.apache.commons.logging.LogFactory; michael@0: michael@0: /** michael@0: *
michael@0:  * $Id$
michael@0:  * 
michael@0:  *  Created [Nov 5, 2004]
michael@0:  * 
michael@0: * michael@0: * The default implementation of a calendar parser. michael@0: * @author Ben Fortuna michael@0: */ michael@0: public class CalendarParserImpl implements CalendarParser { michael@0: michael@0: private static final int WORD_CHAR_START = 32; michael@0: michael@0: private static final int WORD_CHAR_END = 255; michael@0: michael@0: private static final int WHITESPACE_CHAR_START = 0; michael@0: michael@0: private static final int WHITESPACE_CHAR_END = 20; michael@0: michael@0: private static final String UNEXPECTED_TOKEN_MESSAGE = "Expected [{0}], read [{1}]"; michael@0: michael@0: private Log log = LogFactory.getLog(CalendarParserImpl.class); michael@0: michael@0: private final ComponentListParser componentListParser = new ComponentListParser(); michael@0: michael@0: private final ComponentParser componentParser = new ComponentParser(); michael@0: michael@0: private final PropertyListParser propertyListParser = new PropertyListParser(); michael@0: michael@0: private final PropertyParser propertyParser = new PropertyParser(); michael@0: michael@0: private final ParameterListParser paramListParser = new ParameterListParser(); michael@0: michael@0: private final ParameterParser paramParser = new ParameterParser(); michael@0: michael@0: /** michael@0: * {@inheritDoc} michael@0: */ michael@0: public final void parse(final InputStream in, final ContentHandler handler) michael@0: throws IOException, ParserException { michael@0: parse(new InputStreamReader(in), handler); michael@0: } michael@0: michael@0: /** michael@0: * {@inheritDoc} michael@0: */ michael@0: public final void parse(final Reader in, final ContentHandler handler) michael@0: throws IOException, ParserException { michael@0: michael@0: final StreamTokenizer tokeniser = new StreamTokenizer(in); michael@0: try { michael@0: tokeniser.resetSyntax(); michael@0: tokeniser.wordChars(WORD_CHAR_START, WORD_CHAR_END); michael@0: tokeniser.whitespaceChars(WHITESPACE_CHAR_START, michael@0: WHITESPACE_CHAR_END); michael@0: tokeniser.ordinaryChar(':'); michael@0: tokeniser.ordinaryChar(';'); michael@0: tokeniser.ordinaryChar('='); michael@0: tokeniser.ordinaryChar('\t'); michael@0: tokeniser.eolIsSignificant(true); michael@0: tokeniser.whitespaceChars(0, 0); michael@0: tokeniser.quoteChar('"'); michael@0: michael@0: // BEGIN:VCALENDAR michael@0: assertToken(tokeniser, in, Calendar.BEGIN); michael@0: michael@0: assertToken(tokeniser, in, ':'); michael@0: michael@0: assertToken(tokeniser, in, Calendar.VCALENDAR, true); michael@0: michael@0: assertToken(tokeniser, in, StreamTokenizer.TT_EOL); michael@0: michael@0: handler.startCalendar(); michael@0: michael@0: // parse calendar properties.. michael@0: propertyListParser.parse(tokeniser, in, handler); michael@0: michael@0: // parse components.. michael@0: componentListParser.parse(tokeniser, in, handler); michael@0: michael@0: // END:VCALENDAR michael@0: // assertToken(tokeniser,Calendar.END); michael@0: michael@0: assertToken(tokeniser, in, ':'); michael@0: michael@0: assertToken(tokeniser, in, Calendar.VCALENDAR, true); michael@0: michael@0: handler.endCalendar(); michael@0: } michael@0: catch (Exception e) { michael@0: michael@0: if (e instanceof IOException) { michael@0: throw (IOException) e; michael@0: } michael@0: if (e instanceof ParserException) { michael@0: throw (ParserException) e; michael@0: } michael@0: else { michael@0: throw new ParserException(e.getMessage(), getLineNumber(tokeniser, in), e); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Parses an iCalendar property list from the specified stream tokeniser. michael@0: * @param tokeniser michael@0: * @throws IOException michael@0: * @throws ParseException michael@0: * @throws URISyntaxException michael@0: * @throws URISyntaxException michael@0: * @throws ParserException michael@0: */ michael@0: private class PropertyListParser { michael@0: michael@0: public void parse(final StreamTokenizer tokeniser, Reader in, michael@0: final ContentHandler handler) throws IOException, ParseException, michael@0: URISyntaxException, ParserException { michael@0: michael@0: assertToken(tokeniser, in, StreamTokenizer.TT_WORD); michael@0: michael@0: while (/* michael@0: * !Component.BEGIN.equals(tokeniser.sval) && michael@0: */!Component.END.equals(tokeniser.sval)) { michael@0: // check for timezones observances or vevent/vtodo alarms.. michael@0: if (Component.BEGIN.equals(tokeniser.sval)) { michael@0: componentParser.parse(tokeniser, in, handler); michael@0: } michael@0: else { michael@0: propertyParser.parse(tokeniser, in, handler); michael@0: } michael@3: absorbWhitespace(tokeniser, in); michael@0: // assertToken(tokeniser, StreamTokenizer.TT_WORD); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Parses an iCalendar property from the specified stream tokeniser. michael@0: * @param tokeniser michael@0: * @throws IOException michael@0: * @throws ParserException michael@0: * @throws URISyntaxException michael@0: * @throws ParseException michael@0: */ michael@0: private class PropertyParser { michael@0: michael@0: private static final String PARSE_DEBUG_MESSAGE = "Property [{0}]"; michael@0: michael@0: private static final String PARSE_EXCEPTION_MESSAGE = "Property [{0}]"; michael@0: michael@0: private void parse(final StreamTokenizer tokeniser, Reader in, michael@0: final ContentHandler handler) throws IOException, ParserException, michael@0: URISyntaxException, ParseException { michael@0: michael@0: final String name = tokeniser.sval; michael@0: michael@0: // debugging.. michael@0: if (log.isDebugEnabled()) { michael@0: log.debug(MessageFormat.format(PARSE_DEBUG_MESSAGE, new Object[] {name})); michael@0: } michael@0: michael@0: handler.startProperty(name); michael@0: michael@0: paramListParser.parse(tokeniser, in, handler); michael@0: michael@0: // it appears that control tokens (ie. ':') are allowed michael@0: // after the first instance on a line is used.. as such michael@0: // we must continue appending to value until EOL is michael@0: // reached.. michael@0: // assertToken(tokeniser, StreamTokenizer.TT_WORD); michael@0: michael@0: // String value = tokeniser.sval; michael@0: final StringBuffer value = new StringBuffer(); michael@0: michael@0: // assertToken(tokeniser,StreamTokenizer.TT_EOL); michael@0: michael@0: // DQUOTE is ordinary char for property value michael@0: // From sec 4.3.11 of rfc-2445: michael@0: // text = *(TSAFE-CHAR / ":" / DQUOTE / ESCAPED-CHAR) michael@0: // michael@0: tokeniser.ordinaryChar('"'); michael@3: int nextToken = nextToken(tokeniser, in); michael@0: michael@3: while (nextToken != StreamTokenizer.TT_EOL) { michael@0: michael@0: if (tokeniser.ttype == StreamTokenizer.TT_WORD) { michael@0: value.append(tokeniser.sval); michael@0: } michael@0: else { michael@0: value.append((char) tokeniser.ttype); michael@0: } michael@0: michael@3: nextToken = nextToken(tokeniser, in); michael@0: } michael@0: michael@0: // reset DQUOTE to be quote char michael@0: tokeniser.quoteChar('"'); michael@0: michael@0: try { michael@0: handler.propertyValue(value.toString()); michael@0: } michael@0: catch (ParseException e) { michael@0: final ParseException eNew = new ParseException("[" + name + "] " michael@0: + e.getMessage(), e.getErrorOffset()); michael@0: eNew.initCause(e); michael@0: throw eNew; michael@0: } michael@0: michael@0: handler.endProperty(name); michael@0: michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Parses a list of iCalendar parameters by parsing the specified stream tokeniser. michael@0: * @param tokeniser michael@0: * @throws IOException michael@0: * @throws ParserException michael@0: * @throws URISyntaxException michael@0: */ michael@0: private class ParameterListParser { michael@0: michael@0: public void parse(final StreamTokenizer tokeniser, Reader in, michael@0: final ContentHandler handler) throws IOException, ParserException, michael@0: URISyntaxException { michael@0: michael@3: while (nextToken(tokeniser, in) == ';') { michael@0: paramParser.parse(tokeniser, in, handler); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * @param tokeniser michael@0: * @param handler michael@0: * @throws IOException michael@0: * @throws ParserException michael@0: * @throws URISyntaxException michael@0: */ michael@0: private class ParameterParser { michael@0: michael@0: private void parse(final StreamTokenizer tokeniser, Reader in, michael@0: final ContentHandler handler) throws IOException, ParserException, michael@0: URISyntaxException { michael@0: michael@0: assertToken(tokeniser, in, StreamTokenizer.TT_WORD); michael@0: michael@0: final String paramName = tokeniser.sval; michael@0: michael@0: // debugging.. michael@0: if (log.isDebugEnabled()) { michael@0: log.debug("Parameter [" + paramName + "]"); michael@0: } michael@0: michael@0: assertToken(tokeniser, in, '='); michael@0: michael@0: final StringBuffer paramValue = new StringBuffer(); michael@0: michael@0: // preserve quote chars.. michael@3: if (nextToken(tokeniser, in) == '"') { michael@0: paramValue.append('"'); michael@0: paramValue.append(tokeniser.sval); michael@0: paramValue.append('"'); michael@0: } michael@0: else if (tokeniser.sval != null) { michael@0: paramValue.append(tokeniser.sval); michael@0: // check for additional words to account for equals (=) in param-value michael@3: int nextToken = nextToken(tokeniser, in); michael@0: michael@0: while (nextToken != ';' && nextToken != ':' && nextToken != ',') { michael@0: michael@0: if (tokeniser.ttype == StreamTokenizer.TT_WORD) { michael@0: paramValue.append(tokeniser.sval); michael@0: } michael@0: else { michael@0: paramValue.append((char) tokeniser.ttype); michael@0: } michael@0: michael@3: nextToken = nextToken(tokeniser, in); michael@0: } michael@0: tokeniser.pushBack(); michael@0: } else if(tokeniser.sval == null) { michael@0: tokeniser.pushBack(); michael@0: } michael@0: michael@0: try { michael@0: handler.parameter(paramName, paramValue.toString()); michael@0: } michael@0: catch (ClassCastException cce) { michael@0: throw new ParserException("Error parsing parameter", getLineNumber(tokeniser, in), cce); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Parses an iCalendar component list from the specified stream tokeniser. michael@0: * @param tokeniser michael@0: * @throws IOException michael@0: * @throws ParseException michael@0: * @throws URISyntaxException michael@0: * @throws ParserException michael@0: */ michael@0: private class ComponentListParser { michael@0: michael@0: private void parse(final StreamTokenizer tokeniser, Reader in, michael@0: final ContentHandler handler) throws IOException, ParseException, michael@0: URISyntaxException, ParserException { michael@0: michael@0: while (Component.BEGIN.equals(tokeniser.sval)) { michael@0: componentParser.parse(tokeniser, in, handler); michael@3: absorbWhitespace(tokeniser, in); michael@0: // assertToken(tokeniser, StreamTokenizer.TT_WORD); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Parses an iCalendar component from the specified stream tokeniser. michael@0: * @param tokeniser michael@0: * @throws IOException michael@0: * @throws ParseException michael@0: * @throws URISyntaxException michael@0: * @throws ParserException michael@0: */ michael@0: private class ComponentParser { michael@0: michael@0: private void parse(final StreamTokenizer tokeniser, Reader in, michael@0: final ContentHandler handler) throws IOException, ParseException, michael@0: URISyntaxException, ParserException { michael@0: michael@0: assertToken(tokeniser, in, ':'); michael@0: michael@0: assertToken(tokeniser, in, StreamTokenizer.TT_WORD); michael@0: michael@0: final String name = tokeniser.sval; michael@0: michael@0: handler.startComponent(name); michael@0: michael@0: assertToken(tokeniser, in, StreamTokenizer.TT_EOL); michael@0: michael@0: propertyListParser.parse(tokeniser, in, handler); michael@0: michael@0: /* michael@0: * // a special case for VTIMEZONE component which contains michael@0: * // sub-components.. michael@0: * if (Component.VTIMEZONE.equals(name)) { michael@0: * parseComponentList(tokeniser, handler); michael@0: * } michael@0: * // VEVENT/VTODO components may optionally have embedded VALARM michael@0: * // components.. michael@0: * else if ((Component.VEVENT.equals(name) || Component.VTODO.equals(name)) michael@0: * && Component.BEGIN.equals(tokeniser.sval)) { michael@0: * parseComponentList(tokeniser, handler); michael@0: * } michael@0: */ michael@0: michael@0: assertToken(tokeniser, in, ':'); michael@0: michael@0: assertToken(tokeniser, in, name); michael@0: michael@0: assertToken(tokeniser, in, StreamTokenizer.TT_EOL); michael@0: michael@0: handler.endComponent(name); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Asserts that the next token in the stream matches the specified token. michael@0: * @param tokeniser stream tokeniser to perform assertion on michael@0: * @param token expected token michael@0: * @throws IOException when unable to read from stream michael@0: * @throws ParserException when next token in the stream does not match the expected token michael@0: */ michael@0: private void assertToken(final StreamTokenizer tokeniser, Reader in, final int token) michael@0: throws IOException, ParserException { michael@0: michael@3: if (nextToken(tokeniser, in) != token) { michael@0: throw new ParserException(MessageFormat.format(UNEXPECTED_TOKEN_MESSAGE, new Object[] { michael@0: new Integer(token), new Integer(tokeniser.ttype), michael@0: }), getLineNumber(tokeniser, in)); michael@0: } michael@0: michael@0: if (log.isDebugEnabled()) { michael@0: log.debug("[" + token + "]"); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Asserts that the next token in the stream matches the specified token. This method is case-sensitive. michael@0: * @param tokeniser michael@0: * @param token michael@0: * @throws IOException michael@0: * @throws ParserException michael@0: */ michael@0: private void assertToken(final StreamTokenizer tokeniser, Reader in, final String token) michael@0: throws IOException, ParserException { michael@0: assertToken(tokeniser, in, token, false); michael@0: } michael@0: michael@0: /** michael@0: * Asserts that the next token in the stream matches the specified token. michael@0: * @param tokeniser stream tokeniser to perform assertion on michael@0: * @param token expected token michael@0: * @throws IOException when unable to read from stream michael@0: * @throws ParserException when next token in the stream does not match the expected token michael@0: */ michael@0: private void assertToken(final StreamTokenizer tokeniser, Reader in, michael@0: final String token, final boolean ignoreCase) throws IOException, michael@0: ParserException { michael@0: michael@0: // ensure next token is a word token.. michael@0: assertToken(tokeniser, in, StreamTokenizer.TT_WORD); michael@0: michael@0: if (ignoreCase) { michael@0: if (!token.equalsIgnoreCase(tokeniser.sval)) { michael@0: throw new ParserException(MessageFormat.format(UNEXPECTED_TOKEN_MESSAGE, new Object[] { michael@0: token, tokeniser.sval, michael@0: }), getLineNumber(tokeniser, in)); michael@0: } michael@0: } michael@0: else if (!token.equals(tokeniser.sval)) { michael@0: throw new ParserException(MessageFormat.format(UNEXPECTED_TOKEN_MESSAGE, new Object[] { michael@0: token, tokeniser.sval, michael@0: }), getLineNumber(tokeniser, in)); michael@0: } michael@0: michael@0: if (log.isDebugEnabled()) { michael@0: log.debug("[" + token + "]"); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Absorbs extraneous newlines. michael@0: * @param tokeniser michael@0: * @throws IOException michael@0: */ michael@3: private void absorbWhitespace(final StreamTokenizer tokeniser, Reader in) throws IOException, ParserException { michael@0: // HACK: absorb extraneous whitespace between components (KOrganizer).. michael@3: while (nextToken(tokeniser, in) == StreamTokenizer.TT_EOL) { michael@0: if (log.isTraceEnabled()) { michael@0: log.trace("Absorbing extra whitespace.."); michael@0: } michael@0: } michael@0: if (log.isTraceEnabled()) { michael@0: log.trace("Aborting: absorbing extra whitespace complete"); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * @param tokeniser michael@0: * @param in michael@0: * @return michael@0: */ michael@0: private int getLineNumber(StreamTokenizer tokeniser, Reader in) { michael@0: int line = tokeniser.lineno(); michael@0: if (tokeniser.ttype == StreamTokenizer.TT_EOL) { michael@0: line -= 1; michael@0: } michael@0: if (in instanceof UnfoldingReader) { michael@0: // need to take unfolded lines into account michael@0: final int unfolded = ((UnfoldingReader) in).getLinesUnfolded(); michael@0: line += unfolded; michael@0: } michael@0: return line; michael@0: } michael@3: michael@3: /** michael@3: * Reads the next token from the tokeniser. michael@3: * This method throws a ParseException when reading EOF. michael@3: * @param tokeniser michael@3: * @param in michael@3: * @return michael@3: * @throws ParseException When reading EOF. michael@3: */ michael@3: private int nextToken(StreamTokenizer tokeniser, Reader in) throws IOException, ParserException { michael@3: int token = tokeniser.nextToken(); michael@3: if (token == StreamTokenizer.TT_EOF) { michael@3: throw new ParserException("Unexpected end of file", getLineNumber(tokeniser, in)); michael@3: } michael@3: return token; michael@3: } michael@0: }