Tue, 10 Feb 2015 18:12:00 +0100
Import initial revisions of existing project AndroidCaldavSyncAdapater,
forked from upstream repository at 27e8a0f8495c92e0780d450bdf0c7cec77a03a55.
1 /**
2 * Copyright (c) 2012, Ben Fortuna
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * o Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * o Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * o Neither the name of Ben Fortuna nor the names of any other contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32 package net.fortuna.ical4j.data;
34 import java.io.IOException;
35 import java.io.InputStream;
36 import java.io.Reader;
37 import java.net.URISyntaxException;
38 import java.text.ParseException;
39 import java.text.SimpleDateFormat;
40 import java.util.ArrayList;
41 import java.util.Iterator;
42 import java.util.List;
44 import javax.xml.XMLConstants;
45 import javax.xml.parsers.DocumentBuilderFactory;
46 import javax.xml.parsers.ParserConfigurationException;
47 import javax.xml.xpath.XPath;
48 import javax.xml.xpath.XPathConstants;
49 import javax.xml.xpath.XPathException;
50 import javax.xml.xpath.XPathExpression;
51 import javax.xml.xpath.XPathFactory;
53 import net.fortuna.ical4j.model.CalendarException;
54 import net.fortuna.ical4j.model.Component;
55 import net.fortuna.ical4j.model.Date;
56 import net.fortuna.ical4j.model.DateTime;
57 import net.fortuna.ical4j.model.Parameter;
58 import net.fortuna.ical4j.model.Property;
59 import net.fortuna.ical4j.model.parameter.Value;
60 import net.fortuna.ical4j.model.property.Version;
62 import org.apache.commons.lang.StringUtils;
63 import org.apache.commons.logging.Log;
64 import org.apache.commons.logging.LogFactory;
65 import org.w3c.dom.DOMException;
66 import org.w3c.dom.Document;
67 import org.w3c.dom.Element;
68 import org.w3c.dom.Node;
69 import org.w3c.dom.NodeList;
70 import org.xml.sax.InputSource;
71 import org.xml.sax.SAXException;
72 import org.xml.sax.SAXParseException;
74 /**
75 * A {@link CalendarParser} that parses XHTML documents that include calendar data marked up with the hCalendar
76 * microformat.
77 * <p>
78 * The parser treats the entire document as a single "vcalendar" context, ignoring any <code>vcalendar</code> elements
79 * and adding all components in the document to a single generated calendar.
80 * </p>
81 * <p>
82 * Since hCalendar does not include product information, the <code>PRODID</code> property is omitted from the generated
83 * calendar. The hCalendar profile is supposed to define the iCalendar version that it represents, but it does not, so
84 * version 2.0 is assumed.
85 * </p>
86 * <h3>Supported Components</h3>
87 * <p>
88 * This parser recognizes only "vevent" components.
89 * </p>
90 * <h3>Supported Properties</h3>
91 * <p>
92 * This parser recognizes the following properties:
93 * </p>
94 * <ul>
95 * <li>"dtstart"</li>
96 * <li>"dtend"</li>
97 * <li>"duration"</li>
98 * <li>"summary"</li>
99 * <li>"uid"</li>
100 * <li>"dtstamp"</li>
101 * <li>"category"</li>
102 * <li>"location"</li>
103 * <li>"url"</li>
104 * <li>"description"</li>
105 * <li>"last-modified"</li>
106 * <li>"status"</li>
107 * <li>"class"</li>
108 * <li>"attendee"</li>
109 * <li>"contact"</li>
110 * <li>"organizer"</li>
111 * </ul>
112 * <p>
113 * hCalendar allows for some properties to be represented by nested microformat records, including hCard, adr and geo.
114 * This parser does not recognize these records. It simply accumulates the text content of any child elements of the
115 * property element and uses the resulting string as the property value.
116 * </p>
117 * <h4>Date and Date-Time Properties</h4>
118 * <p>
119 * hCalendar date-time values are formatted according to RFC 3339. There is no representation in this specification for
120 * time zone ids. All date-times are specified either in UTC or with an offset that can be used to convert the local
121 * time into UTC. Neither does hCal provide a reprsentation for floating date-times. Therefore, all date-time values
122 * produced by this parser are in UTC.
123 * </p>
124 * <p>
125 * Some examples in the wild provide date and date-time values in iCalendar format rather than RFC 3339 format. Although
126 * not technically legal according to spec, these values are accepted. In this case, floating date-times are produced by
127 * the parser.
128 * </p>
129 * <h3>Supported Parameters</h3>
130 * <p>
131 * hCalendar does not define attributes, nested elements or other information elements representing parameter data.
132 * Therefore, this parser does not set any property parameters except as implied by property value data (e.g.
133 * VALUE=DATE-TIME or VALUE=DATE for date-time properties).
134 * </p>
135 */
136 public class HCalendarParser implements CalendarParser {
138 private static final Log LOG = LogFactory.getLog(HCalendarParser.class);
140 private static final DocumentBuilderFactory BUILDER_FACTORY = DocumentBuilderFactory.newInstance();
141 private static final XPath XPATH = XPathFactory.newInstance().newXPath();
142 private static final XPathExpression XPATH_METHOD;
143 private static final XPathExpression XPATH_VEVENTS;
144 private static final XPathExpression XPATH_DTSTART;
145 private static final XPathExpression XPATH_DTEND;
146 private static final XPathExpression XPATH_DURATION;
147 private static final XPathExpression XPATH_SUMMARY;
148 private static final XPathExpression XPATH_UID;
149 private static final XPathExpression XPATH_DTSTAMP;
150 private static final XPathExpression XPATH_CATEGORY;
151 private static final XPathExpression XPATH_LOCATION;
152 private static final XPathExpression XPATH_URL;
153 private static final XPathExpression XPATH_DESCRIPTION;
154 private static final XPathExpression XPATH_LAST_MODIFIED;
155 private static final XPathExpression XPATH_STATUS;
156 private static final XPathExpression XPATH_CLASS;
157 private static final XPathExpression XPATH_ATTENDEE;
158 private static final XPathExpression XPATH_CONTACT;
159 private static final XPathExpression XPATH_ORGANIZER;
160 private static final XPathExpression XPATH_SEQUENCE;
161 private static final XPathExpression XPATH_ATTACH;
162 private static final String HCAL_DATE_PATTERN = "yyyy-MM-dd";
163 private static final SimpleDateFormat HCAL_DATE_FORMAT = new SimpleDateFormat(HCAL_DATE_PATTERN);
164 private static final String HCAL_DATE_TIME_PATTERN = "yyyy-MM-dd'T'HH:mm:ssz";
165 private static final SimpleDateFormat HCAL_DATE_TIME_FORMAT = new SimpleDateFormat(HCAL_DATE_TIME_PATTERN);
167 static {
168 BUILDER_FACTORY.setNamespaceAware(true);
169 BUILDER_FACTORY.setIgnoringComments(true);
171 XPATH_METHOD = compileExpression("//*[contains(@class, 'method')]");
172 XPATH_VEVENTS = compileExpression("//*[contains(@class, 'vevent')]");
173 XPATH_DTSTART = compileExpression(".//*[contains(@class, 'dtstart')]");
174 XPATH_DTEND = compileExpression(".//*[contains(@class, 'dtend')]");
175 XPATH_DURATION = compileExpression(".//*[contains(@class, 'duration')]");
176 XPATH_SUMMARY = compileExpression(".//*[contains(@class, 'summary')]");
177 XPATH_UID = compileExpression(".//*[contains(@class, 'uid')]");
178 XPATH_DTSTAMP = compileExpression(".//*[contains(@class, 'dtstamp')]");
179 XPATH_CATEGORY = compileExpression(".//*[contains(@class, 'category')]");
180 XPATH_LOCATION = compileExpression(".//*[contains(@class, 'location')]");
181 XPATH_URL = compileExpression(".//*[contains(@class, 'url')]");
182 XPATH_DESCRIPTION = compileExpression(".//*[contains(@class, 'description')]");
183 XPATH_LAST_MODIFIED = compileExpression(".//*[contains(@class, 'last-modified')]");
184 XPATH_STATUS = compileExpression(".//*[contains(@class, 'status')]");
185 XPATH_CLASS = compileExpression(".//*[contains(@class, 'class')]");
186 XPATH_ATTENDEE = compileExpression(".//*[contains(@class, 'attendee')]");
187 XPATH_CONTACT = compileExpression(".//*[contains(@class, 'contact')]");
188 XPATH_ORGANIZER = compileExpression(".//*[contains(@class, 'organizer')]");
189 XPATH_SEQUENCE = compileExpression(".//*[contains(@class, 'sequence')]");
190 XPATH_ATTACH = compileExpression(".//*[contains(@class, 'attach')]");
191 }
193 private static XPathExpression compileExpression(String expr) {
194 try {
195 return XPATH.compile(expr);
196 } catch (XPathException e) {
197 throw new CalendarException(e);
198 }
199 }
201 /**
202 * {@inheritDoc}
203 */
204 public void parse(InputStream in, ContentHandler handler) throws IOException, ParserException {
205 parse(new InputSource(in), handler);
206 }
208 /**
209 * {@inheritDoc}
210 */
211 public void parse(Reader in, ContentHandler handler) throws IOException, ParserException {
212 parse(new InputSource(in), handler);
213 }
215 private void parse(InputSource in, ContentHandler handler) throws IOException, ParserException {
216 try {
217 Document d = BUILDER_FACTORY.newDocumentBuilder().parse(in);
218 buildCalendar(d, handler);
219 } catch (ParserConfigurationException e) {
220 throw new CalendarException(e);
221 } catch (SAXException e) {
222 if (e instanceof SAXParseException) {
223 SAXParseException pe = (SAXParseException) e;
224 throw new ParserException("Could not parse XML", pe.getLineNumber(), e);
225 }
226 throw new ParserException(e.getMessage(), -1, e);
227 }
228 }
230 private static NodeList findNodes(XPathExpression expr, Object context) throws ParserException {
231 try {
232 return (NodeList) expr.evaluate(context, XPathConstants.NODESET);
233 } catch (XPathException e) {
234 throw new ParserException("Unable to find nodes", -1, e);
235 }
236 }
238 private static Node findNode(XPathExpression expr, Object context) throws ParserException {
239 try {
240 return (Node) expr.evaluate(context, XPathConstants.NODE);
241 } catch (XPathException e) {
242 throw new ParserException("Unable to find node", -1, e);
243 }
244 }
246 private static List findElements(XPathExpression expr, Object context) throws ParserException {
247 NodeList nodes = findNodes(expr, context);
248 ArrayList elements = new ArrayList();
249 for (int i = 0; i < nodes.getLength(); i++) {
250 Node n = nodes.item(i);
251 if (n instanceof Element)
252 elements.add((Element) n);
253 }
254 return elements;
255 }
257 private static Element findElement(XPathExpression expr, Object context) throws ParserException {
258 Node n = findNode(expr, context);
259 if (n == null || (!(n instanceof Element)))
260 return null;
261 return (Element) n;
262 }
264 private static String getTextContent(Element element) throws ParserException {
265 try {
266 String content = element.getFirstChild().getNodeValue();
267 if (content != null) {
268 return content.trim().replaceAll("\\s+", " ");
269 }
270 return content;
271 } catch (DOMException e) {
272 throw new ParserException("Unable to get text content for element " + element.getNodeName(), -1, e);
273 }
274 }
276 private void buildCalendar(Document d, ContentHandler handler) throws ParserException {
277 // "The root class name for hCalendar is "vcalendar". An element with a
278 // class name of "vcalendar" is itself called an hCalendar.
279 //
280 // The root class name for events is "vevent". An element with a class
281 // name of "vevent" is itself called an hCalender event.
282 //
283 // For authoring convenience, both "vevent" and "vcalendar" are
284 // treated as root class names for parsing purposes. If a document
285 // contains elements with class name "vevent" but not "vcalendar", the
286 // entire document has an implied "vcalendar" context."
288 // XXX: We assume that the entire document has a single vcalendar
289 // context. It is possible that the document contains more than one
290 // vcalendar element. In this case, we should probably only process
291 // that element and log a warning about skipping the others.
293 if (LOG.isDebugEnabled())
294 LOG.debug("Building calendar");
296 handler.startCalendar();
298 // no PRODID, as the using application should set that itself
300 handler.startProperty(Property.VERSION);
301 try {
302 handler.propertyValue(Version.VERSION_2_0.getValue());
303 } catch (Exception e) {
304 }
305 ;
306 handler.endProperty(Property.VERSION);
308 Element method = findElement(XPATH_METHOD, d);
309 if (method != null) {
310 buildProperty(method, Property.METHOD, handler);
311 }
313 List vevents = findElements(XPATH_VEVENTS, d);
314 for (Iterator i = vevents.iterator(); i.hasNext();) {
315 Element vevent = (Element) i.next();
316 buildEvent(vevent, handler);
317 }
319 // XXX: support other "first class components": vjournal, vtodo,
320 // vfreebusy, vavailability, vvenue
322 handler.endCalendar();
323 }
325 private void buildEvent(Element element, ContentHandler handler) throws ParserException {
326 if (LOG.isDebugEnabled())
327 LOG.debug("Building event");
329 handler.startComponent(Component.VEVENT);
331 buildProperty(findElement(XPATH_DTSTART, element), Property.DTSTART, handler);
332 buildProperty(findElement(XPATH_DTEND, element), Property.DTEND, handler);
333 buildProperty(findElement(XPATH_DURATION, element), Property.DURATION, handler);
334 buildProperty(findElement(XPATH_SUMMARY, element), Property.SUMMARY, handler);
335 buildProperty(findElement(XPATH_UID, element), Property.UID, handler);
336 buildProperty(findElement(XPATH_DTSTAMP, element), Property.DTSTAMP, handler);
337 List categories = findElements(XPATH_CATEGORY, element);
338 for (Iterator i = categories.iterator(); i.hasNext();) {
339 Element category = (Element) i.next();
340 buildProperty(category, Property.CATEGORIES, handler);
341 }
342 buildProperty(findElement(XPATH_LOCATION, element), Property.LOCATION, handler);
343 buildProperty(findElement(XPATH_URL, element), Property.URL, handler);
344 buildProperty(findElement(XPATH_DESCRIPTION, element), Property.DESCRIPTION, handler);
345 buildProperty(findElement(XPATH_LAST_MODIFIED, element), Property.LAST_MODIFIED, handler);
346 buildProperty(findElement(XPATH_STATUS, element), Property.STATUS, handler);
347 buildProperty(findElement(XPATH_CLASS, element), Property.CLASS, handler);
348 List attendees = findElements(XPATH_ATTENDEE, element);
349 for (Iterator i = attendees.iterator(); i.hasNext();) {
350 Element attendee = (Element) i.next();
351 buildProperty(attendee, Property.ATTENDEE, handler);
352 }
353 buildProperty(findElement(XPATH_CONTACT, element), Property.CONTACT, handler);
354 buildProperty(findElement(XPATH_ORGANIZER, element), Property.ORGANIZER, handler);
355 buildProperty(findElement(XPATH_SEQUENCE, element), Property.SEQUENCE, handler);
356 buildProperty(findElement(XPATH_ATTACH, element), Property.ATTACH, handler);
358 handler.endComponent(Component.VEVENT);
359 }
361 private void buildProperty(Element element, String propName, ContentHandler handler) throws ParserException {
362 if (element == null)
363 return;
365 if (LOG.isDebugEnabled())
366 LOG.debug("Building property " + propName);
368 String className = className(propName);
369 String elementName = element.getLocalName().toLowerCase();
371 String value = null;
372 if (elementName.equals("abbr")) {
373 // "If an <abbr> element is used for a property, then the 'title'
374 // attribute of the <abbr> element is the value of the property,
375 // instead of the contents of the element, which instead provide a
376 // human presentable version of the value."
377 value = element.getAttribute("title");
378 if (StringUtils.isBlank(value))
379 throw new ParserException("Abbr element '" + className + "' requires a non-empty title", -1);
380 if (LOG.isDebugEnabled())
381 LOG.debug("Setting value '" + value + "' from title attribute");
382 } else if (isHeaderElement(elementName)) {
383 // try title first. if that's not set, fall back to text content.
384 value = element.getAttribute("title");
385 if (!StringUtils.isBlank(value)) {
386 if (LOG.isDebugEnabled())
387 LOG.debug("Setting value '" + value + "' from title attribute");
388 } else {
389 value = getTextContent(element);
390 if (LOG.isDebugEnabled())
391 LOG.debug("Setting value '" + value + "' from text content");
392 }
393 } else if (elementName.equals("a") && isUrlProperty(propName)) {
394 value = element.getAttribute("href");
395 if (StringUtils.isBlank(value))
396 throw new ParserException("A element '" + className + "' requires a non-empty href", -1);
397 if (LOG.isDebugEnabled())
398 LOG.debug("Setting value '" + value + "' from href attribute");
399 } else if (elementName.equals("img")) {
400 if (isUrlProperty(propName)) {
401 value = element.getAttribute("src");
402 if (StringUtils.isBlank(value))
403 throw new ParserException("Img element '" + className + "' requires a non-empty src", -1);
404 if (LOG.isDebugEnabled())
405 LOG.debug("Setting value '" + value + "' from src attribute");
406 } else {
407 value = element.getAttribute("alt");
408 if (StringUtils.isBlank(value))
409 throw new ParserException("Img element '" + className + "' requires a non-empty alt", -1);
410 if (LOG.isDebugEnabled())
411 LOG.debug("Setting value '" + value + "' from alt attribute");
412 }
413 } else {
414 value = getTextContent(element);
415 if (!StringUtils.isBlank(value)) {
416 if (LOG.isDebugEnabled())
417 LOG.debug("Setting value '" + value + "' from text content");
418 }
419 }
421 if (StringUtils.isBlank(value)) {
422 if (LOG.isDebugEnabled())
423 LOG.debug("Skipping property with empty value");
424 return;
425 }
427 handler.startProperty(propName);
429 // if it's a date property, we have to convert from the
430 // hCalendar-formatted date (RFC 3339) to an iCalendar-formatted date
431 if (isDateProperty(propName)) {
432 try {
433 Date date = icalDate(value);
434 value = date.toString();
436 if (!(date instanceof DateTime))
437 try {
438 handler.parameter(Parameter.VALUE, Value.DATE.getValue());
439 } catch (Exception e) {
440 }
441 } catch (ParseException e) {
442 throw new ParserException("Malformed date value for element '" + className + "'", -1, e);
443 }
444 }
446 if (isTextProperty(propName)) {
447 String lang = element.getAttributeNS(XMLConstants.XML_NS_URI, "lang");
448 if (!StringUtils.isBlank(lang))
449 try {
450 handler.parameter(Parameter.LANGUAGE, lang);
451 } catch (Exception e) {
452 }
453 }
455 // XXX: other parameters?
457 try {
458 handler.propertyValue(value);
459 } catch (URISyntaxException e) {
460 throw new ParserException("Malformed URI value for element '" + className + "'", -1, e);
461 } catch (ParseException e) {
462 throw new ParserException("Malformed value for element '" + className + "'", -1, e);
463 } catch (IOException e) {
464 throw new CalendarException(e);
465 }
467 handler.endProperty(propName);
468 }
470 // "The basic format of hCalendar is to use iCalendar object/property
471 // names in lower-case for class names ..."
472 /*
473 * private static String _icalName(Element element) { return element.getAttribute("class").toUpperCase(); }
474 */
476 private static String className(String propName) {
477 return propName.toLowerCase();
478 }
480 private static boolean isHeaderElement(String name) {
481 return (name.equals("h1") || name.equals("h2") || name.equals("h3")
482 || name.equals("h4") || name.equals("h5") || name
483 .equals("h6"));
484 }
486 private static boolean isDateProperty(String name) {
487 return (name.equals(Property.DTSTART) || name.equals(Property.DTEND) || name.equals(Property.DTSTAMP) || name
488 .equals(Property.LAST_MODIFIED));
489 }
491 private static boolean isUrlProperty(String name) {
492 return (name.equals(Property.URL));
493 }
495 private static boolean isTextProperty(String name) {
496 return (name.equals(Property.SUMMARY) || name.equals(Property.LOCATION) || name.equals(Property.CATEGORIES)
497 || name.equals(Property.DESCRIPTION) || name.equals(Property.ATTENDEE)
498 || name.equals(Property.CONTACT) || name
499 .equals(Property.ORGANIZER));
500 }
502 private static Date icalDate(String original) throws ParseException {
503 // in the real world, some generators use iCalendar formatted
504 // dates and date-times, so try parsing those formats first before
505 // going to RFC 3339 formats
507 if (original.indexOf('T') == -1) {
508 // date-only
509 try {
510 // for some reason Date's pattern matches yyyy-MM-dd, so
511 // don't check it if we find -
512 if (original.indexOf('-') == -1)
513 return new Date(original);
514 } catch (Exception e) {
515 }
516 return new Date(HCAL_DATE_FORMAT.parse(original));
517 }
519 try {
520 return new DateTime(original);
521 } catch (Exception e) {
522 }
524 // the date-time value can represent its time zone in a few different
525 // ways. we have to normalize those to match our pattern.
527 String normalized = null;
529 if (LOG.isDebugEnabled())
530 LOG.debug("normalizing date-time " + original);
532 // 2002-10-09T19:00:00Z
533 if (original.charAt(original.length() - 1) == 'Z') {
534 normalized = original.replaceAll("Z", "GMT-00:00");
535 }
536 // 2002-10-10T00:00:00+05:00
537 else if (original.indexOf("GMT") == -1
538 && (original.charAt(original.length() - 6) == '+' || original.charAt(original.length() - 6) == '-')) {
539 String tzId = "GMT" + original.substring(original.length() - 6);
540 normalized = original.substring(0, original.length() - 6) + tzId;
541 } else {
542 // 2002-10-10T00:00:00GMT+05:00
543 normalized = original;
544 }
546 DateTime dt = new DateTime(HCAL_DATE_TIME_FORMAT.parse(normalized));
548 // hCalendar does not specify a representation for timezone ids
549 // or any other sort of timezone information. the best it does is
550 // give us a timezone offset that we can use to convert the local
551 // time to UTC. furthermore, it has no representation for floating
552 // date-times. therefore, all dates are converted to UTC.
554 dt.setUtc(true);
556 return dt;
557 }
558 }