michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: function LOG(str) { michael@0: dump("*** " + str + "\n"); michael@0: } michael@0: michael@0: const Ci = Components.interfaces; michael@0: const Cc = Components.classes; michael@0: const Cr = Components.results; michael@0: Components.utils.import("resource://gre/modules/XPCOMUtils.jsm"); michael@0: michael@0: const FP_CONTRACTID = "@mozilla.org/feed-processor;1"; michael@0: const FP_CLASSID = Components.ID("{26acb1f0-28fc-43bc-867a-a46aabc85dd4}"); michael@0: const FP_CLASSNAME = "Feed Processor"; michael@0: const FR_CONTRACTID = "@mozilla.org/feed-result;1"; michael@0: const FR_CLASSID = Components.ID("{072a5c3d-30c6-4f07-b87f-9f63d51403f2}"); michael@0: const FR_CLASSNAME = "Feed Result"; michael@0: const FEED_CONTRACTID = "@mozilla.org/feed;1"; michael@0: const FEED_CLASSID = Components.ID("{5d0cfa97-69dd-4e5e-ac84-f253162e8f9a}"); michael@0: const FEED_CLASSNAME = "Feed"; michael@0: const ENTRY_CONTRACTID = "@mozilla.org/feed-entry;1"; michael@0: const ENTRY_CLASSID = Components.ID("{8e4444ff-8e99-4bdd-aa7f-fb3c1c77319f}"); michael@0: const ENTRY_CLASSNAME = "Feed Entry"; michael@0: const TEXTCONSTRUCT_CONTRACTID = "@mozilla.org/feed-textconstruct;1"; michael@0: const TEXTCONSTRUCT_CLASSID = michael@0: Components.ID("{b992ddcd-3899-4320-9909-924b3e72c922}"); michael@0: const TEXTCONSTRUCT_CLASSNAME = "Feed Text Construct"; michael@0: const GENERATOR_CONTRACTID = "@mozilla.org/feed-generator;1"; michael@0: const GENERATOR_CLASSID = michael@0: Components.ID("{414af362-9ad8-4296-898e-62247f25a20e}"); michael@0: const GENERATOR_CLASSNAME = "Feed Generator"; michael@0: const PERSON_CONTRACTID = "@mozilla.org/feed-person;1"; michael@0: const PERSON_CLASSID = Components.ID("{95c963b7-20b2-11db-92f6-001422106990}"); michael@0: const PERSON_CLASSNAME = "Feed Person"; michael@0: michael@0: const IO_CONTRACTID = "@mozilla.org/network/io-service;1" michael@0: const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1" michael@0: const ARRAY_CONTRACTID = "@mozilla.org/array;1"; michael@0: const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1"; michael@0: const PARSERUTILS_CONTRACTID = "@mozilla.org/parserutils;1"; michael@0: michael@0: michael@0: var gIoService = null; michael@0: michael@0: const XMLNS = "http://www.w3.org/XML/1998/namespace"; michael@0: const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/"; michael@0: michael@0: /***** Some general utils *****/ michael@0: function strToURI(link, base) { michael@0: var base = base || null; michael@0: if (!gIoService) michael@0: gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService); michael@0: try { michael@0: return gIoService.newURI(link, null, base); michael@0: } michael@0: catch(e) { michael@0: return null; michael@0: } michael@0: } michael@0: michael@0: function isArray(a) { michael@0: return isObject(a) && a.constructor == Array; michael@0: } michael@0: michael@0: function isObject(a) { michael@0: return (a && typeof a == "object") || isFunction(a); michael@0: } michael@0: michael@0: function isFunction(a) { michael@0: return typeof a == "function"; michael@0: } michael@0: michael@0: function isIID(a, iid) { michael@0: var rv = false; michael@0: try { michael@0: a.QueryInterface(iid); michael@0: rv = true; michael@0: } michael@0: catch(e) { michael@0: } michael@0: return rv; michael@0: } michael@0: michael@0: function isIArray(a) { michael@0: return isIID(a, Ci.nsIArray); michael@0: } michael@0: michael@0: function isIFeedContainer(a) { michael@0: return isIID(a, Ci.nsIFeedContainer); michael@0: } michael@0: michael@0: function stripTags(someHTML) { michael@0: return someHTML.replace(/<[^>]+>/g,""); michael@0: } michael@0: michael@0: /** michael@0: * Searches through an array of links and returns a JS array michael@0: * of matching property bags. michael@0: */ michael@0: const IANA_URI = "http://www.iana.org/assignments/relation/"; michael@0: function findAtomLinks(rel, links) { michael@0: var rvLinks = []; michael@0: for (var i = 0; i < links.length; ++i) { michael@0: var linkElement = links.queryElementAt(i, Ci.nsIPropertyBag2); michael@0: // atom:link MUST have @href michael@0: if (bagHasKey(linkElement, "href")) { michael@0: var relAttribute = null; michael@0: if (bagHasKey(linkElement, "rel")) michael@0: relAttribute = linkElement.getPropertyAsAString("rel") michael@0: if ((!relAttribute && rel == "alternate") || relAttribute == rel) { michael@0: rvLinks.push(linkElement); michael@0: continue; michael@0: } michael@0: // catch relations specified by IANA URI michael@0: if (relAttribute == IANA_URI + rel) { michael@0: rvLinks.push(linkElement); michael@0: } michael@0: } michael@0: } michael@0: return rvLinks; michael@0: } michael@0: michael@0: function xmlEscape(s) { michael@0: s = s.replace(/&/g, "&"); michael@0: s = s.replace(/>/g, ">"); michael@0: s = s.replace(/ 0) { michael@0: ++entries_with_enclosures; michael@0: michael@0: for (var e = 0; e < entry.enclosures.length; ++e) { michael@0: var enc = entry.enclosures.queryElementAt(e, Ci.nsIWritablePropertyBag2); michael@0: if (enc.hasKey("type")) { michael@0: var enctype = enc.get("type"); michael@0: michael@0: if (/^audio/.test(enctype)) { michael@0: ++audio_count; michael@0: } else if (/^image/.test(enctype)) { michael@0: ++image_count; michael@0: } else if (/^video/.test(enctype)) { michael@0: ++video_count; michael@0: } else { michael@0: ++other_count; michael@0: } michael@0: } else { michael@0: ++other_count; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: var feedtype = Ci.nsIFeed.TYPE_FEED; michael@0: michael@0: // For a feed to be marked as TYPE_VIDEO, TYPE_AUDIO and TYPE_IMAGE, michael@0: // we enforce two things: michael@0: // michael@0: // 1. all entries must have at least one enclosure michael@0: // 2. all enclosures must be video for TYPE_VIDEO, audio for TYPE_AUDIO or image michael@0: // for TYPE_IMAGE michael@0: // michael@0: // Otherwise it's a TYPE_FEED. michael@0: if (entries_with_enclosures == this.items.length && other_count == 0) { michael@0: if (audio_count > 0 && !video_count && !image_count) { michael@0: feedtype = Ci.nsIFeed.TYPE_AUDIO; michael@0: michael@0: } else if (image_count > 0 && !audio_count && !video_count) { michael@0: feedtype = Ci.nsIFeed.TYPE_IMAGE; michael@0: michael@0: } else if (video_count > 0 && !audio_count && !image_count) { michael@0: feedtype = Ci.nsIFeed.TYPE_VIDEO; michael@0: } michael@0: } michael@0: michael@0: this.type = feedtype; michael@0: this.enclosureCount = other_count + video_count + audio_count + image_count; michael@0: }, michael@0: michael@0: _atomLinksToURI: function Feed_linkToURI() { michael@0: var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray); michael@0: var alternates = findAtomLinks("alternate", links); michael@0: if (alternates.length > 0) { michael@0: var href = alternates[0].getPropertyAsAString("href"); michael@0: var base; michael@0: if (bagHasKey(alternates[0], "xml:base")) michael@0: base = alternates[0].getPropertyAsAString("xml:base"); michael@0: this.link = this._resolveURI(href, base); michael@0: } michael@0: }, michael@0: michael@0: _resolveImageLink: function Feed_resolveImageLink() { michael@0: var base; michael@0: if (bagHasKey(this.image, "xml:base")) michael@0: base = this.image.getPropertyAsAString("xml:base"); michael@0: var url = this._resolveURI(this.image.getPropertyAsAString("url"), base); michael@0: if (url) michael@0: this.image.setPropertyAsAString("url", url.spec); michael@0: }, michael@0: michael@0: _resolveURI: function Feed_resolveURI(linkSpec, baseSpec) { michael@0: var uri = null; michael@0: try { michael@0: var base = baseSpec ? strToURI(baseSpec, this.baseURI) : this.baseURI; michael@0: uri = strToURI(linkSpec, base); michael@0: } michael@0: catch(e) { michael@0: LOG(e); michael@0: } michael@0: michael@0: return uri; michael@0: }, michael@0: michael@0: // reset the bag to raw contents, not text constructs michael@0: _resetBagMembersToRawText: function Feed_resetBagMembers(fieldLists) { michael@0: for (var i=0; ilawyer@boyer.net (Lawyer Boyer) michael@0: // michael@0: // or, delightfully, a field like this: michael@0: // michael@0: // Simon St.Laurent (mailto:simonstl@simonstl.com) michael@0: // michael@0: // We want to split this up and assign it to corresponding Atom michael@0: // fields. michael@0: // michael@0: function rssAuthor(s,author) { michael@0: author.QueryInterface(Ci.nsIFeedPerson); michael@0: // check for RSS2 string format michael@0: var chars = s.trim(); michael@0: var matches = chars.match(/(.*)\((.*)\)/); michael@0: var emailCheck = michael@0: /^([a-zA-Z0-9_\.\-])+\@(([a-zA-Z0-9\-])+\.)+([a-zA-Z0-9]{2,4})+$/; michael@0: if (matches) { michael@0: var match1 = matches[1].trim(); michael@0: var match2 = matches[2].trim(); michael@0: if (match2.indexOf("mailto:") == 0) michael@0: match2 = match2.substring(7); michael@0: if (emailCheck.test(match1)) { michael@0: author.email = match1; michael@0: author.name = match2; michael@0: } michael@0: else if (emailCheck.test(match2)) { michael@0: author.email = match2; michael@0: author.name = match1; michael@0: } michael@0: else { michael@0: // put it back together michael@0: author.name = match1 + " (" + match2 + ")"; michael@0: } michael@0: } michael@0: else { michael@0: author.name = chars; michael@0: if (chars.indexOf('@')) michael@0: author.email = chars; michael@0: } michael@0: return author; michael@0: } michael@0: michael@0: // michael@0: // skipHours and skipDays map to arrays, so we need to change the michael@0: // string to an nsISupports in order to stick it in there. michael@0: // michael@0: function rssArrayElement(s) { michael@0: var str = Cc["@mozilla.org/supports-string;1"]. michael@0: createInstance(Ci.nsISupportsString); michael@0: str.data = s; michael@0: str.QueryInterface(Ci.nsISupportsString); michael@0: return str; michael@0: } michael@0: michael@0: /** michael@0: * Tries parsing a string through the JavaScript Date object. michael@0: * @param aDateString michael@0: * A string that is supposedly an RFC822 or RFC3339 date. michael@0: * @return A Date.toUTCString, or null if the string can't be parsed. michael@0: */ michael@0: function dateParse(aDateString) { michael@0: let dateString = aDateString.trim(); michael@0: // Without bug 682781 fixed, JS won't parse an RFC822 date with a Z for the michael@0: // timezone, so convert to -00:00 which works for any date format. michael@0: dateString = dateString.replace(/z$/i, "-00:00"); michael@0: let date = new Date(dateString); michael@0: if (!isNaN(date)) { michael@0: return date.toUTCString(); michael@0: } michael@0: return null; michael@0: } michael@0: michael@0: const XHTML_NS = "http://www.w3.org/1999/xhtml"; michael@0: michael@0: // The XHTMLHandler handles inline XHTML found in things like atom:summary michael@0: function XHTMLHandler(processor, isAtom) { michael@0: this._buf = ""; michael@0: this._processor = processor; michael@0: this._depth = 0; michael@0: this._isAtom = isAtom; michael@0: // a stack of lists tracking in-scope namespaces michael@0: this._inScopeNS = []; michael@0: } michael@0: michael@0: // The fidelity can be improved here, to allow handling of stuff like michael@0: // SVG and MathML. XXX michael@0: XHTMLHandler.prototype = { michael@0: michael@0: // look back up at the declared namespaces michael@0: // we always use the same prefixes for our safe stuff michael@0: _isInScope: function XH__isInScope(ns) { michael@0: for (var i in this._inScopeNS) { michael@0: for (var uri in this._inScopeNS[i]) { michael@0: if (this._inScopeNS[i][uri] == ns) michael@0: return true; michael@0: } michael@0: } michael@0: return false; michael@0: }, michael@0: michael@0: startDocument: function XH_startDocument() { michael@0: }, michael@0: endDocument: function XH_endDocument() { michael@0: }, michael@0: startElement: function XH_startElement(uri, localName, qName, attributes) { michael@0: ++this._depth; michael@0: this._inScopeNS.push([]); michael@0: michael@0: // RFC4287 requires XHTML to be wrapped in a div that is *not* part of michael@0: // the content. This prevents people from screwing up namespaces, but michael@0: // we need to skip it here. michael@0: if (this._isAtom && this._depth == 1 && localName == "div") michael@0: return; michael@0: michael@0: // If it's an XHTML element, record it. Otherwise, it's ignored. michael@0: if (uri == XHTML_NS) { michael@0: this._buf += "<" + localName; michael@0: var uri; michael@0: for (var i=0; i < attributes.length; ++i) { michael@0: uri = attributes.getURI(i); michael@0: // XHTML attributes aren't in a namespace michael@0: if (uri == "") { michael@0: this._buf += (" " + attributes.getLocalName(i) + "='" + michael@0: xmlEscape(attributes.getValue(i)) + "'"); michael@0: } else { michael@0: // write a small set of allowed attribute namespaces michael@0: var prefix = gAllowedXHTMLNamespaces[uri]; michael@0: if (prefix != null) { michael@0: // The attribute value we'll attempt to write michael@0: var attributeValue = xmlEscape(attributes.getValue(i)); michael@0: michael@0: // it's an allowed attribute NS. michael@0: // write the attribute michael@0: this._buf += (" " + prefix + ":" + michael@0: attributes.getLocalName(i) + michael@0: "='" + attributeValue + "'"); michael@0: michael@0: // write an xmlns declaration if necessary michael@0: if (prefix != "xml" && !this._isInScope(uri)) { michael@0: this._inScopeNS[this._inScopeNS.length - 1].push(uri); michael@0: this._buf += " xmlns:" + prefix + "='" + uri + "'"; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: this._buf += ">"; michael@0: } michael@0: }, michael@0: endElement: function XH_endElement(uri, localName, qName) { michael@0: --this._depth; michael@0: this._inScopeNS.pop(); michael@0: michael@0: // We need to skip outer divs in Atom. See comment in startElement. michael@0: if (this._isAtom && this._depth == 0 && localName == "div") michael@0: return; michael@0: michael@0: // When we peek too far, go back to the main processor michael@0: if (this._depth < 0) { michael@0: this._processor.returnFromXHTMLHandler(this._buf.trim(), michael@0: uri, localName, qName); michael@0: return; michael@0: } michael@0: // If it's an XHTML element, record it. Otherwise, it's ignored. michael@0: if (uri == XHTML_NS) { michael@0: this._buf += ""; michael@0: } michael@0: }, michael@0: characters: function XH_characters(data) { michael@0: this._buf += xmlEscape(data); michael@0: }, michael@0: startPrefixMapping: function XH_startPrefixMapping(prefix, uri) { michael@0: }, michael@0: endPrefixMapping: function FP_endPrefixMapping(prefix) { michael@0: }, michael@0: processingInstruction: function XH_processingInstruction() { michael@0: }, michael@0: } michael@0: michael@0: /** michael@0: * The ExtensionHandler deals with elements we haven't explicitly michael@0: * added to our transition table in the FeedProcessor. michael@0: */ michael@0: function ExtensionHandler(processor) { michael@0: this._buf = ""; michael@0: this._depth = 0; michael@0: this._hasChildElements = false; michael@0: michael@0: // The FeedProcessor michael@0: this._processor = processor; michael@0: michael@0: // Fields of the outermost extension element. michael@0: this._localName = null; michael@0: this._uri = null; michael@0: this._qName = null; michael@0: this._attrs = null; michael@0: } michael@0: michael@0: ExtensionHandler.prototype = { michael@0: startDocument: function EH_startDocument() { michael@0: }, michael@0: endDocument: function EH_endDocument() { michael@0: }, michael@0: startElement: function EH_startElement(uri, localName, qName, attrs) { michael@0: ++this._depth; michael@0: var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : ""; michael@0: var key = prefix + localName; michael@0: michael@0: if (this._depth == 1) { michael@0: this._uri = uri; michael@0: this._localName = localName; michael@0: this._qName = qName; michael@0: this._attrs = attrs; michael@0: } michael@0: michael@0: // if we descend into another element, we won't send text michael@0: this._hasChildElements = (this._depth > 1); michael@0: michael@0: }, michael@0: endElement: function EH_endElement(uri, localName, qName) { michael@0: --this._depth; michael@0: if (this._depth == 0) { michael@0: var text = this._hasChildElements ? null : this._buf.trim(); michael@0: this._processor.returnFromExtHandler(this._uri, this._localName, michael@0: text, this._attrs); michael@0: } michael@0: }, michael@0: characters: function EH_characters(data) { michael@0: if (!this._hasChildElements) michael@0: this._buf += data; michael@0: }, michael@0: startPrefixMapping: function EH_startPrefixMapping() { michael@0: }, michael@0: endPrefixMapping: function EH_endPrefixMapping() { michael@0: }, michael@0: processingInstruction: function EH_processingInstruction() { michael@0: }, michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * ElementInfo is a simple container object that describes michael@0: * some characteristics of a feed element. For example, it michael@0: * says whether an element can be expected to appear more michael@0: * than once inside a given entry or feed. michael@0: */ michael@0: function ElementInfo(fieldName, containerClass, closeFunc, isArray) { michael@0: this.fieldName = fieldName; michael@0: this.containerClass = containerClass; michael@0: this.closeFunc = closeFunc; michael@0: this.isArray = isArray; michael@0: this.isWrapper = false; michael@0: } michael@0: michael@0: /** michael@0: * FeedElementInfo represents a feed element, usually the root. michael@0: */ michael@0: function FeedElementInfo(fieldName, feedVersion) { michael@0: this.isWrapper = false; michael@0: this.fieldName = fieldName; michael@0: this.feedVersion = feedVersion; michael@0: } michael@0: michael@0: /** michael@0: * Some feed formats include vestigial wrapper elements that we don't michael@0: * want to include in our object model, but we do need to keep track michael@0: * of during parsing. michael@0: */ michael@0: function WrapperElementInfo(fieldName) { michael@0: this.isWrapper = true; michael@0: this.fieldName = fieldName; michael@0: } michael@0: michael@0: /***** The Processor *****/ michael@0: function FeedProcessor() { michael@0: this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader); michael@0: this._buf = ""; michael@0: this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); michael@0: this._handlerStack = []; michael@0: this._xmlBaseStack = []; // sparse array keyed to nesting depth michael@0: this._depth = 0; michael@0: this._state = "START"; michael@0: this._result = null; michael@0: this._extensionHandler = null; michael@0: this._xhtmlHandler = null; michael@0: this._haveSentResult = false; michael@0: michael@0: // The nsIFeedResultListener waiting for the parse results michael@0: this.listener = null; michael@0: michael@0: // These elements can contain (X)HTML or plain text. michael@0: // We keep a table here that contains their default treatment michael@0: this._textConstructs = {"atom:title":"text", michael@0: "atom:summary":"text", michael@0: "atom:rights":"text", michael@0: "atom:content":"text", michael@0: "atom:subtitle":"text", michael@0: "description":"html", michael@0: "rss1:description":"html", michael@0: "dc:description":"html", michael@0: "content:encoded":"html", michael@0: "title":"text", michael@0: "rss1:title":"text", michael@0: "atom03:title":"text", michael@0: "atom03:tagline":"text", michael@0: "atom03:summary":"text", michael@0: "atom03:content":"text"}; michael@0: this._stack = []; michael@0: michael@0: this._trans = { michael@0: "START": { michael@0: //If we hit a root RSS element, treat as RSS2. michael@0: "rss": new FeedElementInfo("RSS2", "rss2"), michael@0: michael@0: // If we hit an RDF element, if could be RSS1, but we can't michael@0: // verify that until we hit a rss1:channel element. michael@0: "rdf:RDF": new WrapperElementInfo("RDF"), michael@0: michael@0: // If we hit a Atom 1.0 element, treat as Atom 1.0. michael@0: "atom:feed": new FeedElementInfo("Atom", "atom"), michael@0: michael@0: // Treat as Atom 0.3 michael@0: "atom03:feed": new FeedElementInfo("Atom03", "atom03"), michael@0: }, michael@0: michael@0: /********* RSS2 **********/ michael@0: "IN_RSS2": { michael@0: "channel": new WrapperElementInfo("channel") michael@0: }, michael@0: michael@0: "IN_CHANNEL": { michael@0: "item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true), michael@0: "managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "category": new ElementInfo("categories", null, rssCatTerm, true), michael@0: "cloud": new ElementInfo("cloud", null, null, false), michael@0: "image": new ElementInfo("image", null, null, false), michael@0: "textInput": new ElementInfo("textInput", null, null, false), michael@0: "skipDays": new ElementInfo("skipDays", null, null, false), michael@0: "skipHours": new ElementInfo("skipHours", null, null, false), michael@0: "generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], michael@0: atomGenerator, false), michael@0: }, michael@0: michael@0: "IN_ITEMS": { michael@0: "author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "category": new ElementInfo("categories", null, rssCatTerm, true), michael@0: "enclosure": new ElementInfo("enclosure", null, null, false), michael@0: "media:content": new ElementInfo("mediacontent", null, null, true), michael@0: "media:group": new ElementInfo("mediagroup", null, null, false), michael@0: "guid": new ElementInfo("guid", null, rssGuid, false) michael@0: }, michael@0: michael@0: "IN_SKIPDAYS": { michael@0: "day": new ElementInfo("days", null, rssArrayElement, true) michael@0: }, michael@0: michael@0: "IN_SKIPHOURS":{ michael@0: "hour": new ElementInfo("hours", null, rssArrayElement, true) michael@0: }, michael@0: michael@0: "IN_MEDIAGROUP": { michael@0: "media:content": new ElementInfo("mediacontent", null, null, true) michael@0: }, michael@0: michael@0: /********* RSS1 **********/ michael@0: "IN_RDF": { michael@0: // If we hit a rss1:channel, we can verify that we have RSS1 michael@0: "rss1:channel": new FeedElementInfo("rdf_channel", "rss1"), michael@0: "rss1:image": new ElementInfo("image", null, null, false), michael@0: "rss1:textinput": new ElementInfo("textInput", null, null, false), michael@0: "rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true), michael@0: }, michael@0: michael@0: "IN_RDF_CHANNEL": { michael@0: "admin:generatorAgent": new ElementInfo("generator", michael@0: Cc[GENERATOR_CONTRACTID], michael@0: null, false), michael@0: "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], michael@0: rssAuthor, true), michael@0: }, michael@0: michael@0: /********* ATOM 1.0 **********/ michael@0: "IN_ATOM": { michael@0: "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], michael@0: atomGenerator, false), michael@0: "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom:link": new ElementInfo("links", null, null, true), michael@0: "atom:logo": new ElementInfo("atom:logo", null, atomLogo, false), michael@0: "atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID], michael@0: null, true) michael@0: }, michael@0: michael@0: "IN_ENTRIES": { michael@0: "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom:link": new ElementInfo("links", null, null, true), michael@0: }, michael@0: michael@0: /********* ATOM 0.3 **********/ michael@0: "IN_ATOM03": { michael@0: "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom03:contributor": new ElementInfo("contributors", michael@0: Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom03:link": new ElementInfo("links", null, null, true), michael@0: "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID], michael@0: null, true), michael@0: "atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], michael@0: atomGenerator, false), michael@0: }, michael@0: michael@0: "IN_ATOM03_ENTRIES": { michael@0: "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom03:contributor": new ElementInfo("contributors", michael@0: Cc[PERSON_CONTRACTID], michael@0: null, true), michael@0: "atom03:link": new ElementInfo("links", null, null, true), michael@0: "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID], michael@0: null, true) michael@0: } michael@0: } michael@0: } michael@0: michael@0: // See startElement for a long description of how feeds are processed. michael@0: FeedProcessor.prototype = { michael@0: michael@0: // Set ourselves as the SAX handler, and set the base URI michael@0: _init: function FP_init(uri) { michael@0: this._reader.contentHandler = this; michael@0: this._reader.errorHandler = this; michael@0: this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult); michael@0: if (uri) { michael@0: this._result.uri = uri; michael@0: this._reader.baseURI = uri; michael@0: this._xmlBaseStack[0] = uri; michael@0: } michael@0: }, michael@0: michael@0: // This function is called once we figure out what type of feed michael@0: // we're dealing with. Some feed types require digging a bit further michael@0: // than the root. michael@0: _docVerified: function FP_docVerified(version) { michael@0: this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed); michael@0: this._result.doc.baseURI = michael@0: this._xmlBaseStack[this._xmlBaseStack.length - 1]; michael@0: this._result.doc.fields = this._feed; michael@0: this._result.version = version; michael@0: }, michael@0: michael@0: // When we're done with the feed, let the listener know what michael@0: // happened. michael@0: _sendResult: function FP_sendResult() { michael@0: this._haveSentResult = true; michael@0: try { michael@0: // Can be null when a non-feed is fed to us michael@0: if (this._result.doc) michael@0: this._result.doc.normalize(); michael@0: } michael@0: catch (e) { michael@0: LOG("FIXME: " + e); michael@0: } michael@0: michael@0: try { michael@0: if (this.listener != null) michael@0: this.listener.handleResult(this._result); michael@0: } michael@0: finally { michael@0: this._result = null; michael@0: } michael@0: }, michael@0: michael@0: // Parsing functions michael@0: parseFromStream: function FP_parseFromStream(stream, uri) { michael@0: this._init(uri); michael@0: this._reader.parseFromStream(stream, null, stream.available(), michael@0: "application/xml"); michael@0: this._reader = null; michael@0: }, michael@0: michael@0: parseFromString: function FP_parseFromString(inputString, uri) { michael@0: this._init(uri); michael@0: this._reader.parseFromString(inputString, "application/xml"); michael@0: this._reader = null; michael@0: }, michael@0: michael@0: parseAsync: function FP_parseAsync(requestObserver, uri) { michael@0: this._init(uri); michael@0: this._reader.parseAsync(requestObserver); michael@0: }, michael@0: michael@0: // nsIStreamListener michael@0: michael@0: // The XMLReader will throw sensible exceptions if these get called michael@0: // out of order. michael@0: onStartRequest: function FP_onStartRequest(request, context) { michael@0: // this will throw if the request is not a channel, but so will nsParser. michael@0: var channel = request.QueryInterface(Ci.nsIChannel); michael@0: channel.contentType = "application/vnd.mozilla.maybe.feed"; michael@0: this._reader.onStartRequest(request, context); michael@0: }, michael@0: michael@0: onStopRequest: function FP_onStopRequest(request, context, statusCode) { michael@0: try { michael@0: this._reader.onStopRequest(request, context, statusCode); michael@0: } michael@0: finally { michael@0: this._reader = null; michael@0: } michael@0: }, michael@0: michael@0: onDataAvailable: michael@0: function FP_onDataAvailable(request, context, inputStream, offset, count) { michael@0: this._reader.onDataAvailable(request, context, inputStream, offset, count); michael@0: }, michael@0: michael@0: // nsISAXErrorHandler michael@0: michael@0: // We only care about fatal errors. When this happens, we may have michael@0: // parsed through the feed metadata and some number of entries. The michael@0: // listener can still show some of that data if it wants, and we'll michael@0: // set the bozo bit to indicate we were unable to parse all the way michael@0: // through. michael@0: fatalError: function FP_reportError() { michael@0: this._result.bozo = true; michael@0: //XXX need to QI to FeedProgressListener michael@0: if (!this._haveSentResult) michael@0: this._sendResult(); michael@0: }, michael@0: michael@0: // nsISAXContentHandler michael@0: michael@0: startDocument: function FP_startDocument() { michael@0: //LOG("----------"); michael@0: }, michael@0: michael@0: endDocument: function FP_endDocument() { michael@0: if (!this._haveSentResult) michael@0: this._sendResult(); michael@0: }, michael@0: michael@0: // The transitions defined above identify elements that contain more michael@0: // than just text. For example RSS items contain many fields, and so michael@0: // do Atom authors. The only commonly used elements that contain michael@0: // mixed content are Atom Text Constructs of type="xhtml", which we michael@0: // delegate to another handler for cleaning. That leaves a couple michael@0: // different types of elements to deal with: those that should occur michael@0: // only once, such as title elements, and those that can occur michael@0: // multiple times, such as the RSS category element and the Atom michael@0: // link element. Most of the RSS1/DC elements can occur multiple michael@0: // times in theory, but in practice, the only ones that do have michael@0: // analogues in Atom. michael@0: // michael@0: // Some elements are also groups of attributes or sub-elements, michael@0: // while others are simple text fields. For the most part, we don't michael@0: // have to pay explicit attention to the simple text elements, michael@0: // unless we want to post-process the resulting string to transform michael@0: // it into some richer object like a Date or URI. michael@0: // michael@0: // Elements that have more sophisticated content models still end up michael@0: // being dictionaries, whether they are based on attributes like RSS michael@0: // cloud, sub-elements like Atom author, or even items and michael@0: // entries. These elements are treated as "containers". It's michael@0: // theoretically possible for a container to have an attribute with michael@0: // the same universal name as a sub-element, but none of the feed michael@0: // formats allow this by default, and I don't of any extension that michael@0: // works this way. michael@0: // michael@0: startElement: function FP_startElement(uri, localName, qName, attributes) { michael@0: this._buf = ""; michael@0: ++this._depth; michael@0: var elementInfo; michael@0: michael@0: //LOG("<" + localName + ">"); michael@0: michael@0: // Check for xml:base michael@0: var base = attributes.getValueFromName(XMLNS, "base"); michael@0: if (base) { michael@0: this._xmlBaseStack[this._depth] = michael@0: strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]); michael@0: } michael@0: michael@0: // To identify the element we're dealing with, we look up the michael@0: // namespace URI in our gNamespaces dictionary, which will give us michael@0: // a "canonical" prefix for a namespace URI. For example, this michael@0: // allows Dublin Core "creator" elements to be consistently mapped michael@0: // to "dc:creator", for easy field access by consumer code. This michael@0: // strategy also happens to shorten up our state table. michael@0: var key = this._prefixForNS(uri) + localName; michael@0: michael@0: // Check to see if we need to hand this off to our XHTML handler. michael@0: // The elements we're dealing with will look like this: michael@0: // michael@0: // michael@0: // <div xmlns="http://www.w3.org/1999/xhtml"> michael@0: // A title with <b>bold</b> and <i>italics</i>. michael@0: // </div> michael@0: // michael@0: // michael@0: // When it returns in returnFromXHTMLHandler, the handler should michael@0: // give us back a string like this: michael@0: // michael@0: // "A title with bold and italics." michael@0: // michael@0: // The Atom spec explicitly says the div is not part of the content, michael@0: // and explicitly allows whitespace collapsing. michael@0: // michael@0: if ((this._result.version == "atom" || this._result.version == "atom03") && michael@0: this._textConstructs[key] != null) { michael@0: var type = attributes.getValueFromName("","type"); michael@0: if (type != null && type.indexOf("xhtml") >= 0) { michael@0: this._xhtmlHandler = michael@0: new XHTMLHandler(this, (this._result.version == "atom")); michael@0: this._reader.contentHandler = this._xhtmlHandler; michael@0: return; michael@0: } michael@0: } michael@0: michael@0: // Check our current state, and see if that state has a defined michael@0: // transition. For example, this._trans["atom:entry"]["atom:author"] michael@0: // will have one, and it tells us to add an item to our authors array. michael@0: if (this._trans[this._state] && this._trans[this._state][key]) { michael@0: elementInfo = this._trans[this._state][key]; michael@0: } michael@0: else { michael@0: // If we don't have a transition, hand off to extension handler michael@0: this._extensionHandler = new ExtensionHandler(this); michael@0: this._reader.contentHandler = this._extensionHandler; michael@0: this._extensionHandler.startElement(uri, localName, qName, attributes); michael@0: return; michael@0: } michael@0: michael@0: // This distinguishes wrappers like 'channel' from elements michael@0: // we'd actually like to do something with (which will test true). michael@0: this._handlerStack[this._depth] = elementInfo; michael@0: if (elementInfo.isWrapper) { michael@0: this._state = "IN_" + elementInfo.fieldName.toUpperCase(); michael@0: this._stack.push([this._feed, this._state]); michael@0: } michael@0: else if (elementInfo.feedVersion) { michael@0: this._state = "IN_" + elementInfo.fieldName.toUpperCase(); michael@0: michael@0: // Check for the older RSS2 variants michael@0: if (elementInfo.feedVersion == "rss2") michael@0: elementInfo.feedVersion = this._findRSSVersion(attributes); michael@0: else if (uri == RSS090NS) michael@0: elementInfo.feedVersion = "rss090"; michael@0: michael@0: this._docVerified(elementInfo.feedVersion); michael@0: this._stack.push([this._feed, this._state]); michael@0: this._mapAttributes(this._feed, attributes); michael@0: } michael@0: else { michael@0: this._state = this._processComplexElement(elementInfo, attributes); michael@0: } michael@0: }, michael@0: michael@0: // In the endElement handler, we decrement the stack and look michael@0: // for cleanup/transition functions to execute. The second part michael@0: // of the state transition works as above in startElement, but michael@0: // the state we're looking for is prefixed with an underscore michael@0: // to distinguish endElement events from startElement events. michael@0: endElement: function FP_endElement(uri, localName, qName) { michael@0: var elementInfo = this._handlerStack[this._depth]; michael@0: //LOG(""); michael@0: if (elementInfo && !elementInfo.isWrapper) michael@0: this._closeComplexElement(elementInfo); michael@0: michael@0: // cut down xml:base context michael@0: if (this._xmlBaseStack.length == this._depth + 1) michael@0: this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth); michael@0: michael@0: // our new state is whatever is at the top of the stack now michael@0: if (this._stack.length > 0) michael@0: this._state = this._stack[this._stack.length - 1][1]; michael@0: this._handlerStack = this._handlerStack.slice(0, this._depth); michael@0: --this._depth; michael@0: }, michael@0: michael@0: // Buffer up character data. The buffer is cleared with every michael@0: // opening element. michael@0: characters: function FP_characters(data) { michael@0: this._buf += data; michael@0: }, michael@0: // TODO: It would be nice to check new prefixes here, and if they michael@0: // don't conflict with the ones we've defined, throw them in a michael@0: // dictionary to check. michael@0: startPrefixMapping: function FP_startPrefixMapping(prefix, uri) { michael@0: }, michael@0: michael@0: endPrefixMapping: function FP_endPrefixMapping(prefix) { michael@0: }, michael@0: michael@0: processingInstruction: function FP_processingInstruction(target, data) { michael@0: if (target == "xml-stylesheet") { michael@0: var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/); michael@0: if (hrefAttribute && hrefAttribute.length == 2) michael@0: this._result.stylesheet = strToURI(hrefAttribute[1], this._result.uri); michael@0: } michael@0: }, michael@0: michael@0: // end of nsISAXContentHandler michael@0: michael@0: // Handle our more complicated elements--those that contain michael@0: // attributes and child elements. michael@0: _processComplexElement: michael@0: function FP__processComplexElement(elementInfo, attributes) { michael@0: var obj, key, prefix; michael@0: michael@0: // If the container is an entry/item, it'll need to have its michael@0: // more esoteric properties put in the 'fields' property bag. michael@0: if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) { michael@0: obj = elementInfo.containerClass.createInstance(Ci.nsIFeedEntry); michael@0: obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1]; michael@0: this._mapAttributes(obj.fields, attributes); michael@0: } michael@0: else if (elementInfo.containerClass) { michael@0: obj = elementInfo.containerClass.createInstance(Ci.nsIFeedElementBase); michael@0: obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1]; michael@0: obj.attributes = attributes; // just set the SAX attributes michael@0: } michael@0: else { michael@0: obj = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); michael@0: this._mapAttributes(obj, attributes); michael@0: } michael@0: michael@0: // We should have a container/propertyBag that's had its michael@0: // attributes processed. Now we need to attach it to its michael@0: // container. michael@0: var newProp; michael@0: michael@0: // First we'll see what's on top of the stack. michael@0: var container = this._stack[this._stack.length - 1][0]; michael@0: michael@0: // Check to see if it has the property michael@0: var prop; michael@0: try { michael@0: prop = container.getProperty(elementInfo.fieldName); michael@0: } michael@0: catch(e) { michael@0: } michael@0: michael@0: if (elementInfo.isArray) { michael@0: if (!prop) { michael@0: container.setPropertyAsInterface(elementInfo.fieldName, michael@0: Cc[ARRAY_CONTRACTID]. michael@0: createInstance(Ci.nsIMutableArray)); michael@0: } michael@0: michael@0: newProp = container.getProperty(elementInfo.fieldName); michael@0: // XXX This QI should not be necessary, but XPConnect seems to fly michael@0: // off the handle in the browser, and loses track of the interface michael@0: // on large files. Bug 335638. michael@0: newProp.QueryInterface(Ci.nsIMutableArray); michael@0: newProp.appendElement(obj,false); michael@0: michael@0: // If new object is an nsIFeedContainer, we want to deal with michael@0: // its member nsIPropertyBag instead. michael@0: if (isIFeedContainer(obj)) michael@0: newProp = obj.fields; michael@0: michael@0: } michael@0: else { michael@0: // If it doesn't, set it. michael@0: if (!prop) { michael@0: container.setPropertyAsInterface(elementInfo.fieldName,obj); michael@0: } michael@0: newProp = container.getProperty(elementInfo.fieldName); michael@0: } michael@0: michael@0: // make our new state name, and push the property onto the stack michael@0: var newState = "IN_" + elementInfo.fieldName.toUpperCase(); michael@0: this._stack.push([newProp, newState, obj]); michael@0: return newState; michael@0: }, michael@0: michael@0: // Sometimes we need reconcile the element content with the object michael@0: // model for a given feed. We use helper functions to do the michael@0: // munging, but we need to identify array types here, so the munging michael@0: // happens only to the last element of an array. michael@0: _closeComplexElement: function FP__closeComplexElement(elementInfo) { michael@0: var stateTuple = this._stack.pop(); michael@0: var container = stateTuple[0]; michael@0: var containerParent = stateTuple[2]; michael@0: var element = null; michael@0: var isArray = isIArray(container); michael@0: michael@0: // If it's an array and we have to post-process, michael@0: // grab the last element michael@0: if (isArray) michael@0: element = container.queryElementAt(container.length - 1, Ci.nsISupports); michael@0: else michael@0: element = container; michael@0: michael@0: // Run the post-processing function if there is one. michael@0: if (elementInfo.closeFunc) michael@0: element = elementInfo.closeFunc(this._buf, element); michael@0: michael@0: // If an nsIFeedContainer was on top of the stack, michael@0: // we need to normalize it michael@0: if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) michael@0: containerParent.normalize(); michael@0: michael@0: // If it's an array, re-set the last element michael@0: if (isArray) michael@0: container.replaceElementAt(element, container.length - 1, false); michael@0: }, michael@0: michael@0: _prefixForNS: function FP_prefixForNS(uri) { michael@0: if (!uri) michael@0: return ""; michael@0: var prefix = gNamespaces[uri]; michael@0: if (prefix) michael@0: return prefix + ":"; michael@0: if (uri.toLowerCase().indexOf("http://backend.userland.com") == 0) michael@0: return ""; michael@0: else michael@0: return null; michael@0: }, michael@0: michael@0: _mapAttributes: function FP__mapAttributes(bag, attributes) { michael@0: // Cycle through the attributes, and set our properties using the michael@0: // prefix:localNames we find in our namespace dictionary. michael@0: for (var i = 0; i < attributes.length; ++i) { michael@0: var key = this._prefixForNS(attributes.getURI(i)) + attributes.getLocalName(i); michael@0: var val = attributes.getValue(i); michael@0: bag.setPropertyAsAString(key, val); michael@0: } michael@0: }, michael@0: michael@0: // Only for RSS2esque formats michael@0: _findRSSVersion: function FP__findRSSVersion(attributes) { michael@0: var versionAttr = attributes.getValueFromName("", "version").trim(); michael@0: var versions = { "0.91":"rss091", michael@0: "0.92":"rss092", michael@0: "0.93":"rss093", michael@0: "0.94":"rss094" } michael@0: if (versions[versionAttr]) michael@0: return versions[versionAttr]; michael@0: if (versionAttr.substr(0,2) != "2.") michael@0: return "rssUnknown"; michael@0: return "rss2"; michael@0: }, michael@0: michael@0: // unknown element values are returned here. See startElement above michael@0: // for how this works. michael@0: returnFromExtHandler: michael@0: function FP_returnExt(uri, localName, chars, attributes) { michael@0: --this._depth; michael@0: michael@0: // take control of the SAX events michael@0: this._reader.contentHandler = this; michael@0: if (localName == null && chars == null) michael@0: return; michael@0: michael@0: // we don't take random elements inside rdf:RDF michael@0: if (this._state == "IN_RDF") michael@0: return; michael@0: michael@0: // Grab the top of the stack michael@0: var top = this._stack[this._stack.length - 1]; michael@0: if (!top) michael@0: return; michael@0: michael@0: var container = top[0]; michael@0: // Grab the last element if it's an array michael@0: if (isIArray(container)) { michael@0: var contract = this._handlerStack[this._depth].containerClass; michael@0: // check if it's something specific, but not an entry michael@0: if (contract && contract != Cc[ENTRY_CONTRACTID]) { michael@0: var el = container.queryElementAt(container.length - 1, michael@0: Ci.nsIFeedElementBase); michael@0: // XXX there must be a way to flatten these interfaces michael@0: if (contract == Cc[PERSON_CONTRACTID]) michael@0: el.QueryInterface(Ci.nsIFeedPerson); michael@0: else michael@0: return; // don't know about this interface michael@0: michael@0: var propName = localName; michael@0: var prefix = gNamespaces[uri]; michael@0: michael@0: // synonyms michael@0: if ((uri == "" || michael@0: prefix && michael@0: ((prefix.indexOf("atom") > -1) || michael@0: (prefix.indexOf("rss") > -1))) && michael@0: (propName == "url" || propName == "href")) michael@0: propName = "uri"; michael@0: michael@0: try { michael@0: if (el[propName] !== "undefined") { michael@0: var propValue = chars; michael@0: // convert URI-bearing values to an nsIURI michael@0: if (propName == "uri") { michael@0: var base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; michael@0: propValue = strToURI(chars, base); michael@0: } michael@0: el[propName] = propValue; michael@0: } michael@0: } michael@0: catch(e) { michael@0: // ignore XPConnect errors michael@0: } michael@0: // the rest of the function deals with entry- and feed-level stuff michael@0: return; michael@0: } michael@0: else { michael@0: container = container.queryElementAt(container.length - 1, michael@0: Ci.nsIWritablePropertyBag2); michael@0: } michael@0: } michael@0: michael@0: // Make the buffer our new property michael@0: var propName = this._prefixForNS(uri) + localName; michael@0: michael@0: // But, it could be something containing HTML. If so, michael@0: // we need to know about that. michael@0: if (this._textConstructs[propName] != null && michael@0: this._handlerStack[this._depth].containerClass !== null) { michael@0: var newProp = Cc[TEXTCONSTRUCT_CONTRACTID]. michael@0: createInstance(Ci.nsIFeedTextConstruct); michael@0: newProp.text = chars; michael@0: // Look up the default type in our table michael@0: var type = this._textConstructs[propName]; michael@0: var typeAttribute = attributes.getValueFromName("","type"); michael@0: if (this._result.version == "atom" && typeAttribute != null) { michael@0: type = typeAttribute; michael@0: } michael@0: else if (this._result.version == "atom03" && typeAttribute != null) { michael@0: if (typeAttribute.toLowerCase().indexOf("xhtml") >= 0) { michael@0: type = "xhtml"; michael@0: } michael@0: else if (typeAttribute.toLowerCase().indexOf("html") >= 0) { michael@0: type = "html"; michael@0: } michael@0: else if (typeAttribute.toLowerCase().indexOf("text") >= 0) { michael@0: type = "text"; michael@0: } michael@0: } michael@0: michael@0: // If it's rss feed-level description, it's not supposed to have html michael@0: if (this._result.version.indexOf("rss") >= 0 && michael@0: this._handlerStack[this._depth].containerClass != ENTRY_CONTRACTID) { michael@0: type = "text"; michael@0: } michael@0: newProp.type = type; michael@0: newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; michael@0: container.setPropertyAsInterface(propName, newProp); michael@0: } michael@0: else { michael@0: container.setPropertyAsAString(propName, chars); michael@0: } michael@0: }, michael@0: michael@0: // Sometimes, we'll hand off SAX handling duties to an XHTMLHandler michael@0: // (see above) that will scrape out non-XHTML stuff, normalize michael@0: // namespaces, and remove the wrapper div from Atom 1.0. When the michael@0: // XHTMLHandler is done, it'll callback here. michael@0: returnFromXHTMLHandler: michael@0: function FP_returnFromXHTMLHandler(chars, uri, localName, qName) { michael@0: // retake control of the SAX content events michael@0: this._reader.contentHandler = this; michael@0: michael@0: // Grab the top of the stack michael@0: var top = this._stack[this._stack.length - 1]; michael@0: if (!top) michael@0: return; michael@0: var container = top[0]; michael@0: michael@0: // Assign the property michael@0: var newProp = newProp = Cc[TEXTCONSTRUCT_CONTRACTID]. michael@0: createInstance(Ci.nsIFeedTextConstruct); michael@0: newProp.text = chars; michael@0: newProp.type = "xhtml"; michael@0: newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; michael@0: container.setPropertyAsInterface(this._prefixForNS(uri) + localName, michael@0: newProp); michael@0: michael@0: // XHTML will cause us to peek too far. The XHTML handler will michael@0: // send us an end element to call. RFC4287-valid feeds allow a michael@0: // more graceful way to handle this. Unfortunately, we can't count michael@0: // on compliance at this point. michael@0: this.endElement(uri, localName, qName); michael@0: }, michael@0: michael@0: // XPCOM stuff michael@0: classID: FP_CLASSID, michael@0: QueryInterface: XPCOMUtils.generateQI( michael@0: [Ci.nsIFeedProcessor, Ci.nsISAXContentHandler, Ci.nsISAXErrorHandler, michael@0: Ci.nsIStreamListener, Ci.nsIRequestObserver] michael@0: ) michael@0: } michael@0: michael@0: var components = [FeedProcessor, FeedResult, Feed, Entry, michael@0: TextConstruct, Generator, Person]; michael@0: michael@0: this.NSGetFactory = XPCOMUtils.generateNSGetFactory(components);