toolkit/components/feeds/FeedProcessor.js

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 function LOG(str) {
     7   dump("*** " + str + "\n");
     8 }
    10 const Ci = Components.interfaces;
    11 const Cc = Components.classes;
    12 const Cr = Components.results;
    13 Components.utils.import("resource://gre/modules/XPCOMUtils.jsm");
    15 const FP_CONTRACTID = "@mozilla.org/feed-processor;1";
    16 const FP_CLASSID = Components.ID("{26acb1f0-28fc-43bc-867a-a46aabc85dd4}");
    17 const FP_CLASSNAME = "Feed Processor";
    18 const FR_CONTRACTID = "@mozilla.org/feed-result;1";
    19 const FR_CLASSID = Components.ID("{072a5c3d-30c6-4f07-b87f-9f63d51403f2}");
    20 const FR_CLASSNAME = "Feed Result";
    21 const FEED_CONTRACTID = "@mozilla.org/feed;1";
    22 const FEED_CLASSID = Components.ID("{5d0cfa97-69dd-4e5e-ac84-f253162e8f9a}");
    23 const FEED_CLASSNAME = "Feed";
    24 const ENTRY_CONTRACTID = "@mozilla.org/feed-entry;1";
    25 const ENTRY_CLASSID = Components.ID("{8e4444ff-8e99-4bdd-aa7f-fb3c1c77319f}");
    26 const ENTRY_CLASSNAME = "Feed Entry";
    27 const TEXTCONSTRUCT_CONTRACTID = "@mozilla.org/feed-textconstruct;1";
    28 const TEXTCONSTRUCT_CLASSID =
    29   Components.ID("{b992ddcd-3899-4320-9909-924b3e72c922}");
    30 const TEXTCONSTRUCT_CLASSNAME = "Feed Text Construct";
    31 const GENERATOR_CONTRACTID = "@mozilla.org/feed-generator;1";
    32 const GENERATOR_CLASSID =
    33   Components.ID("{414af362-9ad8-4296-898e-62247f25a20e}");
    34 const GENERATOR_CLASSNAME = "Feed Generator";
    35 const PERSON_CONTRACTID = "@mozilla.org/feed-person;1";
    36 const PERSON_CLASSID = Components.ID("{95c963b7-20b2-11db-92f6-001422106990}");
    37 const PERSON_CLASSNAME = "Feed Person";
    39 const IO_CONTRACTID = "@mozilla.org/network/io-service;1"
    40 const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1"
    41 const ARRAY_CONTRACTID = "@mozilla.org/array;1";
    42 const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1";
    43 const PARSERUTILS_CONTRACTID = "@mozilla.org/parserutils;1";
    46 var gIoService = null;
    48 const XMLNS = "http://www.w3.org/XML/1998/namespace";
    49 const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/";
    51 /***** Some general utils *****/
    52 function strToURI(link, base) {
    53   var base = base || null;
    54   if (!gIoService)
    55     gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService);
    56   try {
    57     return gIoService.newURI(link, null, base);
    58   }
    59   catch(e) {
    60     return null;
    61   }
    62 }
    64 function isArray(a) {
    65   return isObject(a) && a.constructor == Array;
    66 }
    68 function isObject(a) {
    69   return (a && typeof a == "object") || isFunction(a);
    70 }
    72 function isFunction(a) {
    73   return typeof a == "function";
    74 }
    76 function isIID(a, iid) {
    77   var rv = false;
    78   try {
    79     a.QueryInterface(iid);
    80     rv = true;
    81   }
    82   catch(e) {
    83   }
    84   return rv;
    85 }
    87 function isIArray(a) {
    88   return isIID(a, Ci.nsIArray);
    89 }
    91 function isIFeedContainer(a) {
    92   return isIID(a, Ci.nsIFeedContainer);
    93 }
    95 function stripTags(someHTML) {
    96   return someHTML.replace(/<[^>]+>/g,"");
    97 }
    99 /**
   100  * Searches through an array of links and returns a JS array 
   101  * of matching property bags.
   102  */
   103 const IANA_URI = "http://www.iana.org/assignments/relation/";
   104 function findAtomLinks(rel, links) {
   105   var rvLinks = [];
   106   for (var i = 0; i < links.length; ++i) {
   107     var linkElement = links.queryElementAt(i, Ci.nsIPropertyBag2);
   108     // atom:link MUST have @href
   109     if (bagHasKey(linkElement, "href")) {
   110       var relAttribute = null;
   111       if (bagHasKey(linkElement, "rel"))
   112         relAttribute = linkElement.getPropertyAsAString("rel")
   113       if ((!relAttribute && rel == "alternate") || relAttribute == rel) {
   114         rvLinks.push(linkElement);
   115         continue;
   116       }
   117       // catch relations specified by IANA URI 
   118       if (relAttribute == IANA_URI + rel) {
   119         rvLinks.push(linkElement);
   120       }
   121     }
   122   }
   123   return rvLinks;
   124 }
   126 function xmlEscape(s) {
   127   s = s.replace(/&/g, "&amp;");
   128   s = s.replace(/>/g, "&gt;");
   129   s = s.replace(/</g, "&lt;");
   130   s = s.replace(/"/g, "&quot;");
   131   s = s.replace(/'/g, "&apos;");
   132   return s;
   133 }
   135 function arrayContains(array, element) {
   136   for (var i = 0; i < array.length; ++i) {
   137     if (array[i] == element) {
   138       return true;
   139     }
   140   }
   141   return false;
   142 }
   144 // XXX add hasKey to nsIPropertyBag
   145 function bagHasKey(bag, key) {
   146   try {
   147     bag.getProperty(key);
   148     return true;
   149   }
   150   catch (e) {
   151     return false;
   152   }
   153 }
   155 function makePropGetter(key) {
   156   return function FeedPropGetter(bag) {
   157     try {
   158       return value = bag.getProperty(key);
   159     }
   160     catch(e) {
   161     }
   162     return null;
   163   }
   164 }
   166 const RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
   167 // namespace map
   168 var gNamespaces = {
   169   "http://webns.net/mvcb/":"admin",
   170   "http://backend.userland.com/rss":"",
   171   "http://blogs.law.harvard.edu/tech/rss":"",
   172   "http://www.w3.org/2005/Atom":"atom",
   173   "http://purl.org/atom/ns#":"atom03",
   174   "http://purl.org/rss/1.0/modules/content/":"content",
   175   "http://purl.org/dc/elements/1.1/":"dc",
   176   "http://purl.org/dc/terms/":"dcterms",
   177   "http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf",
   178   "http://purl.org/rss/1.0/":"rss1",
   179   "http://my.netscape.com/rdf/simple/0.9/":"rss1",
   180   "http://wellformedweb.org/CommentAPI/":"wfw",                              
   181   "http://purl.org/rss/1.0/modules/wiki/":"wiki", 
   182   "http://www.w3.org/XML/1998/namespace":"xml",
   183   "http://search.yahoo.com/mrss/":"media",
   184   "http://search.yahoo.com/mrss":"media"
   185 }
   187 // We allow a very small set of namespaces in XHTML content,
   188 // for attributes only
   189 var gAllowedXHTMLNamespaces = {
   190   "http://www.w3.org/XML/1998/namespace":"xml",
   191   // if someone ns qualifies XHTML, we have to prefix it to avoid an
   192   // attribute collision.
   193   "http://www.w3.org/1999/xhtml":"xhtml"
   194 }
   196 function FeedResult() {}
   197 FeedResult.prototype = {
   198   bozo: false,
   199   doc: null,
   200   version: null,
   201   headers: null,
   202   uri: null,
   203   stylesheet: null,
   205   registerExtensionPrefix: function FR_registerExtensionPrefix(ns, prefix) {
   206     throw Cr.NS_ERROR_NOT_IMPLEMENTED;
   207   },
   209   // XPCOM stuff
   210   classID: FR_CLASSID,
   211   QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedResult])
   212 }  
   214 function Feed() {
   215   this.subtitle = null;
   216   this.title = null;
   217   this.items = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
   218   this.link = null;
   219   this.id = null;
   220   this.generator = null;
   221   this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
   222   this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
   223   this.baseURI = null;
   224   this.enclosureCount = 0;
   225   this.type = Ci.nsIFeed.TYPE_FEED;
   226 }
   228 Feed.prototype = {
   229   searchLists: {
   230     title: ["title", "rss1:title", "atom03:title", "atom:title"],
   231     subtitle: ["description","dc:description","rss1:description",
   232                "atom03:tagline","atom:subtitle"],
   233     items: ["items","atom03_entries","entries"],
   234     id: ["atom:id","rdf:about"],
   235     generator: ["generator"],
   236     authors : ["authors"],
   237     contributors: ["contributors"],
   238     title: ["title","rss1:title", "atom03:title","atom:title"],
   239     link:  [["link",strToURI],["rss1:link",strToURI]],
   240     categories: ["categories", "dc:subject"],
   241     rights: ["atom03:rights","atom:rights"],
   242     cloud: ["cloud"],
   243     image: ["image", "rss1:image", "atom:logo"],
   244     textInput: ["textInput", "rss1:textinput"],
   245     skipDays: ["skipDays"],
   246     skipHours: ["skipHours"],
   247     updated: ["pubDate", "lastBuildDate", "atom03:modified", "dc:date",
   248               "dcterms:modified", "atom:updated"]
   249   },
   251   normalize: function Feed_normalize() {
   252     fieldsToObj(this, this.searchLists);
   253     if (this.skipDays)
   254       this.skipDays = this.skipDays.getProperty("days");
   255     if (this.skipHours)
   256       this.skipHours = this.skipHours.getProperty("hours");
   258     if (this.updated)
   259       this.updated = dateParse(this.updated);
   261     // Assign Atom link if needed
   262     if (bagHasKey(this.fields, "links"))
   263       this._atomLinksToURI();
   265     this._calcEnclosureCountAndFeedType();
   267     // Resolve relative image links
   268     if (this.image && bagHasKey(this.image, "url"))
   269       this._resolveImageLink();
   271     this._resetBagMembersToRawText([this.searchLists.subtitle, 
   272                                     this.searchLists.title]);
   273   },
   275   _calcEnclosureCountAndFeedType: function Feed_calcEnclosureCountAndFeedType() {
   276     var entries_with_enclosures = 0;
   277     var audio_count = 0;
   278     var image_count = 0;
   279     var video_count = 0;
   280     var other_count = 0;
   282     for (var i = 0; i < this.items.length; ++i) {
   283       var entry = this.items.queryElementAt(i, Ci.nsIFeedEntry);
   284       entry.QueryInterface(Ci.nsIFeedContainer);
   286       if (entry.enclosures && entry.enclosures.length > 0) {
   287         ++entries_with_enclosures;
   289         for (var e = 0; e < entry.enclosures.length; ++e) {
   290           var enc = entry.enclosures.queryElementAt(e, Ci.nsIWritablePropertyBag2);
   291           if (enc.hasKey("type")) {
   292             var enctype = enc.get("type");
   294             if (/^audio/.test(enctype)) {
   295               ++audio_count;
   296             } else if (/^image/.test(enctype)) {
   297               ++image_count;
   298             } else if (/^video/.test(enctype)) {
   299               ++video_count;
   300             } else {
   301               ++other_count;
   302             }
   303           } else {
   304             ++other_count;
   305           }
   306         }
   307       }
   308     }
   310     var feedtype = Ci.nsIFeed.TYPE_FEED;
   312     // For a feed to be marked as TYPE_VIDEO, TYPE_AUDIO and TYPE_IMAGE, 
   313     // we enforce two things:
   314     //
   315     //    1. all entries must have at least one enclosure
   316     //    2. all enclosures must be video for TYPE_VIDEO, audio for TYPE_AUDIO or image
   317     //       for TYPE_IMAGE
   318     //
   319     // Otherwise it's a TYPE_FEED.
   320     if (entries_with_enclosures == this.items.length && other_count == 0) {
   321       if (audio_count > 0 && !video_count && !image_count) {
   322         feedtype = Ci.nsIFeed.TYPE_AUDIO;
   324       } else if (image_count > 0 && !audio_count && !video_count) {
   325         feedtype = Ci.nsIFeed.TYPE_IMAGE;
   327       } else if (video_count > 0 && !audio_count && !image_count) {
   328         feedtype = Ci.nsIFeed.TYPE_VIDEO;
   329       }
   330     }
   332     this.type = feedtype;
   333     this.enclosureCount = other_count + video_count + audio_count + image_count;
   334   },
   336   _atomLinksToURI: function Feed_linkToURI() {
   337     var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
   338     var alternates = findAtomLinks("alternate", links);
   339     if (alternates.length > 0) {
   340       var href = alternates[0].getPropertyAsAString("href");
   341       var base;
   342       if (bagHasKey(alternates[0], "xml:base"))
   343         base = alternates[0].getPropertyAsAString("xml:base");
   344       this.link = this._resolveURI(href, base);
   345     }
   346   },
   348   _resolveImageLink: function Feed_resolveImageLink() {
   349     var base;
   350     if (bagHasKey(this.image, "xml:base"))
   351       base = this.image.getPropertyAsAString("xml:base");
   352     var url = this._resolveURI(this.image.getPropertyAsAString("url"), base);
   353     if (url)
   354       this.image.setPropertyAsAString("url", url.spec);
   355   },
   357   _resolveURI: function Feed_resolveURI(linkSpec, baseSpec) {
   358     var uri = null;
   359     try {
   360       var base = baseSpec ? strToURI(baseSpec, this.baseURI) : this.baseURI;
   361       uri = strToURI(linkSpec, base);
   362     }
   363     catch(e) {
   364       LOG(e);
   365     }
   367     return uri;
   368   },
   370   // reset the bag to raw contents, not text constructs
   371   _resetBagMembersToRawText: function Feed_resetBagMembers(fieldLists) {
   372     for (var i=0; i<fieldLists.length; i++) {      
   373       for (var j=0; j<fieldLists[i].length; j++) {
   374         if (bagHasKey(this.fields, fieldLists[i][j])) {
   375           var textConstruct = this.fields.getProperty(fieldLists[i][j]);
   376           this.fields.setPropertyAsAString(fieldLists[i][j],
   377                                            textConstruct.text);
   378         }
   379       }
   380     }
   381   },
   383   // XPCOM stuff
   384   classID: FEED_CLASSID,
   385   QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeed, Ci.nsIFeedContainer])
   386 }
   388 function Entry() {
   389   this.summary = null;
   390   this.content = null;
   391   this.title = null;
   392   this.fields = Cc["@mozilla.org/hash-property-bag;1"].
   393     createInstance(Ci.nsIWritablePropertyBag2);
   394   this.link = null;
   395   this.id = null;
   396   this.baseURI = null;
   397   this.updated = null;
   398   this.published = null;
   399   this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
   400   this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
   401 }
   403 Entry.prototype = {
   404   fields: null,
   405   enclosures: null,
   406   mediaContent: null,
   408   searchLists: {
   409     title: ["title", "rss1:title", "atom03:title", "atom:title"],
   410     link: [["link",strToURI],["rss1:link",strToURI]],
   411     id: [["guid", makePropGetter("guid")], "rdf:about",
   412          "atom03:id", "atom:id"],
   413     authors : ["authors"],
   414     contributors: ["contributors"],
   415     summary: ["description", "rss1:description", "dc:description",
   416               "atom03:summary", "atom:summary"],
   417     content: ["content:encoded","atom03:content","atom:content"],
   418     rights: ["atom03:rights","atom:rights"],
   419     published: ["pubDate", "atom03:issued", "dcterms:issued", "atom:published"],
   420     updated: ["pubDate", "atom03:modified", "dc:date", "dcterms:modified",
   421               "atom:updated"]
   422   },
   424   normalize: function Entry_normalize() {
   425     fieldsToObj(this, this.searchLists);
   427     // Assign Atom link if needed
   428     if (bagHasKey(this.fields, "links"))
   429       this._atomLinksToURI();
   431     // Populate enclosures array
   432     this._populateEnclosures();
   434     // The link might be a guid w/ permalink=true
   435     if (!this.link && bagHasKey(this.fields, "guid")) {
   436       var guid = this.fields.getProperty("guid");
   437       var isPermaLink = true;
   439       if (bagHasKey(guid, "isPermaLink"))
   440         isPermaLink = guid.getProperty("isPermaLink").toLowerCase() != "false";
   442       if (guid && isPermaLink)
   443         this.link = strToURI(guid.getProperty("guid"));
   444     }
   446     if (this.updated)
   447       this.updated = dateParse(this.updated);
   448     if (this.published)
   449       this.published = dateParse(this.published);
   451     this._resetBagMembersToRawText([this.searchLists.content, 
   452                                     this.searchLists.summary, 
   453                                     this.searchLists.title]);
   454   },
   456   _populateEnclosures: function Entry_populateEnclosures() {
   457     if (bagHasKey(this.fields, "links"))
   458       this._atomLinksToEnclosures();
   460     // Add RSS2 enclosure to enclosures
   461     if (bagHasKey(this.fields, "enclosure"))
   462       this._enclosureToEnclosures();
   464     // Add media:content to enclosures
   465     if (bagHasKey(this.fields, "mediacontent"))
   466       this._mediacontentToEnclosures();
   468     // Add media:content in media:group to enclosures
   469     if (bagHasKey(this.fields, "mediagroup"))
   470       this._mediagroupToEnclosures();
   471   },
   473   __enclosure_map: null,
   475   _addToEnclosures: function Entry_addToEnclosures(new_enc) {
   476     // items we add to the enclosures array get displayed in the FeedWriter and
   477     // they must have non-empty urls.
   478     if (!bagHasKey(new_enc, "url") || new_enc.getPropertyAsAString("url") == "")
   479       return;
   481     if (this.__enclosure_map == null)
   482       this.__enclosure_map = {};
   484     var previous_enc = this.__enclosure_map[new_enc.getPropertyAsAString("url")];
   486     if (previous_enc != undefined) {
   487       previous_enc.QueryInterface(Ci.nsIWritablePropertyBag2);
   489       if (!bagHasKey(previous_enc, "type") && bagHasKey(new_enc, "type"))
   490         previous_enc.setPropertyAsAString("type", new_enc.getPropertyAsAString("type"));
   492       if (!bagHasKey(previous_enc, "length") && bagHasKey(new_enc, "length"))
   493         previous_enc.setPropertyAsAString("length", new_enc.getPropertyAsAString("length"));
   495       return;
   496     }
   498     if (this.enclosures == null) {
   499       this.enclosures = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
   500       this.enclosures.QueryInterface(Ci.nsIMutableArray);
   501     }
   503     this.enclosures.appendElement(new_enc, false);
   504     this.__enclosure_map[new_enc.getPropertyAsAString("url")] = new_enc;
   505   },
   507   _atomLinksToEnclosures: function Entry_linkToEnclosure() {
   508     var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
   509     var enc_links = findAtomLinks("enclosure", links);
   510     if (enc_links.length == 0)
   511       return;
   513     for (var i = 0; i < enc_links.length; ++i) {
   514       var link = enc_links[i];
   516       // an enclosure must have an href
   517       if (!(link.getProperty("href")))
   518         return;
   520       var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
   522       // copy Atom bits over to equivalent enclosure bits
   523       enc.setPropertyAsAString("url", link.getPropertyAsAString("href"));
   524       if (bagHasKey(link, "type"))
   525         enc.setPropertyAsAString("type", link.getPropertyAsAString("type"));
   526       if (bagHasKey(link, "length"))
   527         enc.setPropertyAsAString("length", link.getPropertyAsAString("length"));
   529       this._addToEnclosures(enc);
   530     }
   531   },
   533   _enclosureToEnclosures: function Entry_enclosureToEnclosures() {
   534     var enc = this.fields.getPropertyAsInterface("enclosure", Ci.nsIPropertyBag2);
   536     if (!(enc.getProperty("url")))
   537       return;
   539     this._addToEnclosures(enc);
   540   },
   542   _mediacontentToEnclosures: function Entry_mediacontentToEnclosures() {
   543     var mediacontent = this.fields.getPropertyAsInterface("mediacontent", Ci.nsIArray);
   545     for (var i = 0; i < mediacontent.length; ++i) {
   546       var contentElement = mediacontent.queryElementAt(i, Ci.nsIWritablePropertyBag2);
   548       // media:content don't require url, but if it's not there, we should
   549       // skip it.
   550       if (!bagHasKey(contentElement, "url"))
   551         continue;
   553       var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
   555       // copy media:content bits over to equivalent enclosure bits
   556       enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url"));
   557       if (bagHasKey(contentElement, "type")) {
   558         enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type"));
   559       }
   560       if (bagHasKey(contentElement, "fileSize")) {
   561         enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize"));
   562       }
   564       this._addToEnclosures(enc);
   565     }
   566   },
   568   _mediagroupToEnclosures: function Entry_mediagroupToEnclosures() {
   569     var group = this.fields.getPropertyAsInterface("mediagroup", Ci.nsIPropertyBag2);
   571     var content = group.getPropertyAsInterface("mediacontent", Ci.nsIArray);
   572     for (var i = 0; i < content.length; ++i) {
   573       var contentElement = content.queryElementAt(i, Ci.nsIWritablePropertyBag2);
   574       // media:content don't require url, but if it's not there, we should
   575       // skip it.
   576       if (!bagHasKey(contentElement, "url"))
   577         continue;
   579       var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
   581       // copy media:content bits over to equivalent enclosure bits
   582       enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url"));
   583       if (bagHasKey(contentElement, "type")) {
   584         enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type"));
   585       }
   586       if (bagHasKey(contentElement, "fileSize")) {
   587         enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize"));
   588       }
   590       this._addToEnclosures(enc);
   591     }
   592   },
   594   // XPCOM stuff
   595   classID: ENTRY_CLASSID,
   596   QueryInterface: XPCOMUtils.generateQI(
   597     [Ci.nsIFeedEntry, Ci.nsIFeedContainer]
   598   )
   599 }
   601 Entry.prototype._atomLinksToURI = Feed.prototype._atomLinksToURI;
   602 Entry.prototype._resolveURI = Feed.prototype._resolveURI;
   603 Entry.prototype._resetBagMembersToRawText = 
   604    Feed.prototype._resetBagMembersToRawText;
   606 // TextConstruct represents and element that could contain (X)HTML
   607 function TextConstruct() {
   608   this.lang = null;
   609   this.base = null;
   610   this.type = "text";
   611   this.text = null;
   612   this.parserUtils = Cc[PARSERUTILS_CONTRACTID].getService(Ci.nsIParserUtils);
   613 }
   615 TextConstruct.prototype = {
   616   plainText: function TC_plainText() {
   617     if (this.type != "text") {
   618       return this.parserUtils.convertToPlainText(stripTags(this.text),
   619         Ci.nsIDocumentEncoder.OutputSelectionOnly |
   620         Ci.nsIDocumentEncoder.OutputAbsoluteLinks,
   621         0);
   622     }
   623     return this.text;
   624   },
   626   createDocumentFragment: function TC_createDocumentFragment(element) {
   627     if (this.type == "text") {
   628       var doc = element.ownerDocument;
   629       var docFragment = doc.createDocumentFragment();
   630       var node = doc.createTextNode(this.text);
   631       docFragment.appendChild(node);
   632       return docFragment;
   633     }
   634     var isXML;
   635     if (this.type == "xhtml")
   636       isXML = true
   637     else if (this.type == "html")
   638       isXML = false;
   639     else
   640       return null;
   642     return this.parserUtils.parseFragment(this.text, 0, isXML,
   643                                           this.base, element);
   644   },
   646   // XPCOM stuff
   647   classID: TEXTCONSTRUCT_CLASSID,
   648   QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedTextConstruct])
   649 }
   651 // Generator represents the software that produced the feed
   652 function Generator() {
   653   this.lang = null;
   654   this.agent = null;
   655   this.version = null;
   656   this.uri = null;
   658   // nsIFeedElementBase
   659   this._attributes = null;
   660   this.baseURI = null;
   661 }
   663 Generator.prototype = {
   665   get attributes() {
   666     return this._attributes;
   667   },
   669   set attributes(value) {
   670     this._attributes = value;
   671     this.version = this._attributes.getValueFromName("","version");
   672     var uriAttribute = this._attributes.getValueFromName("","uri") ||
   673                        this._attributes.getValueFromName("","url");
   674     this.uri = strToURI(uriAttribute, this.baseURI);
   676     // RSS1
   677     uriAttribute = this._attributes.getValueFromName(RDF_NS,"resource");
   678     if (uriAttribute) {
   679       this.agent = uriAttribute;
   680       this.uri = strToURI(uriAttribute, this.baseURI);
   681     }
   682   },
   684   // XPCOM stuff
   685   classID: GENERATOR_CLASSID,
   686   QueryInterface: XPCOMUtils.generateQI(
   687     [Ci.nsIFeedGenerator, Ci.nsIFeedElementBase]
   688   )
   689 }
   691 function Person() {
   692   this.name = null;
   693   this.uri = null;
   694   this.email = null;
   696   // nsIFeedElementBase
   697   this.attributes = null;
   698   this.baseURI = null;
   699 }
   701 Person.prototype = {
   702   // XPCOM stuff
   703   classID: PERSON_CLASSID,
   704   QueryInterface: XPCOMUtils.generateQI(
   705     [Ci.nsIFeedPerson, Ci.nsIFeedElementBase]
   706   )
   707 }
   709 /** 
   710  * Map a list of fields into properties on a container.
   711  *
   712  * @param container An nsIFeedContainer
   713  * @param fields A list of fields to search for. List members can
   714  *               be a list, in which case the second member is 
   715  *               transformation function (like parseInt).
   716  */
   717 function fieldsToObj(container, fields) {
   718   var props,prop,field,searchList;
   719   for (var key in fields) {
   720     searchList = fields[key];
   721     for (var i=0; i < searchList.length; ++i) {
   722       props = searchList[i];
   723       prop = null;
   724       field = isArray(props) ? props[0] : props;
   725       try {
   726         prop = container.fields.getProperty(field);
   727       } 
   728       catch(e) { 
   729       }
   730       if (prop) {
   731         prop = isArray(props) ? props[1](prop) : prop;
   732         container[key] = prop;
   733       }
   734     }
   735   }
   736 }
   738 /**
   739  * Lower cases an element's localName property
   740  * @param   element A DOM element.
   741  *
   742  * @returns The lower case localName property of the specified element
   743  */
   744 function LC(element) {
   745   return element.localName.toLowerCase();
   746 }
   748 // TODO move these post-processor functions
   749 // create a generator element
   750 function atomGenerator(s, generator) {
   751   generator.QueryInterface(Ci.nsIFeedGenerator);
   752   generator.agent = s.trim();
   753   return generator;
   754 }
   756 // post-process atom:logo to create an RSS2-like structure
   757 function atomLogo(s, logo) {
   758   logo.setPropertyAsAString("url", s.trim());
   759 }
   761 // post-process an RSS category, map it to the Atom fields.
   762 function rssCatTerm(s, cat) {
   763   // add slash handling?
   764   cat.setPropertyAsAString("term", s.trim());
   765   return cat;
   766 } 
   768 // post-process a GUID 
   769 function rssGuid(s, guid) {
   770   guid.setPropertyAsAString("guid", s.trim());
   771   return guid;
   772 }
   774 // post-process an RSS author element
   775 //
   776 // It can contain a field like this:
   777 // 
   778 //  <author>lawyer@boyer.net (Lawyer Boyer)</author>
   779 //
   780 // or, delightfully, a field like this:
   781 //
   782 //  <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
   783 //
   784 // We want to split this up and assign it to corresponding Atom
   785 // fields.
   786 //
   787 function rssAuthor(s,author) {
   788   author.QueryInterface(Ci.nsIFeedPerson);
   789   // check for RSS2 string format
   790   var chars = s.trim();
   791   var matches = chars.match(/(.*)\((.*)\)/);
   792   var emailCheck = 
   793     /^([a-zA-Z0-9_\.\-])+\@(([a-zA-Z0-9\-])+\.)+([a-zA-Z0-9]{2,4})+$/;
   794   if (matches) {
   795     var match1 = matches[1].trim();
   796     var match2 = matches[2].trim();
   797     if (match2.indexOf("mailto:") == 0)
   798       match2 = match2.substring(7);
   799     if (emailCheck.test(match1)) {
   800       author.email = match1;
   801       author.name = match2;
   802     }
   803     else if (emailCheck.test(match2)) {
   804       author.email = match2;
   805       author.name = match1;
   806     }
   807     else {
   808       // put it back together
   809       author.name = match1 + " (" + match2 + ")";
   810     }
   811   }
   812   else {
   813     author.name = chars;
   814     if (chars.indexOf('@'))
   815       author.email = chars;
   816   }
   817   return author;
   818 }
   820 //
   821 // skipHours and skipDays map to arrays, so we need to change the
   822 // string to an nsISupports in order to stick it in there.
   823 //
   824 function rssArrayElement(s) {
   825   var str = Cc["@mozilla.org/supports-string;1"].
   826               createInstance(Ci.nsISupportsString);
   827   str.data = s;
   828   str.QueryInterface(Ci.nsISupportsString);
   829   return str;
   830 }
   832 /**
   833  * Tries parsing a string through the JavaScript Date object.
   834  * @param aDateString
   835  *        A string that is supposedly an RFC822 or RFC3339 date.
   836  * @return A Date.toUTCString, or null if the string can't be parsed.
   837  */
   838 function dateParse(aDateString) {
   839   let dateString = aDateString.trim();
   840   // Without bug 682781 fixed, JS won't parse an RFC822 date with a Z for the
   841   // timezone, so convert to -00:00 which works for any date format.
   842   dateString = dateString.replace(/z$/i, "-00:00");
   843   let date = new Date(dateString);
   844   if (!isNaN(date)) {
   845     return date.toUTCString();
   846   }
   847   return null;
   848 } 
   850 const XHTML_NS = "http://www.w3.org/1999/xhtml";
   852 // The XHTMLHandler handles inline XHTML found in things like atom:summary
   853 function XHTMLHandler(processor, isAtom) {
   854   this._buf = "";
   855   this._processor = processor;
   856   this._depth = 0;
   857   this._isAtom = isAtom;
   858   // a stack of lists tracking in-scope namespaces
   859   this._inScopeNS = [];
   860 }
   862 // The fidelity can be improved here, to allow handling of stuff like
   863 // SVG and MathML. XXX
   864 XHTMLHandler.prototype = {
   866    // look back up at the declared namespaces 
   867    // we always use the same prefixes for our safe stuff
   868   _isInScope: function XH__isInScope(ns) {
   869     for (var i in this._inScopeNS) {
   870       for (var uri in this._inScopeNS[i]) {
   871         if (this._inScopeNS[i][uri] == ns)
   872           return true;
   873       }
   874     }
   875     return false;
   876   },
   878   startDocument: function XH_startDocument() {
   879   },
   880   endDocument: function XH_endDocument() {
   881   },
   882   startElement: function XH_startElement(uri, localName, qName, attributes) {
   883     ++this._depth;
   884     this._inScopeNS.push([]);
   886     // RFC4287 requires XHTML to be wrapped in a div that is *not* part of 
   887     // the content. This prevents people from screwing up namespaces, but
   888     // we need to skip it here.
   889     if (this._isAtom && this._depth == 1 && localName == "div")
   890       return;
   892     // If it's an XHTML element, record it. Otherwise, it's ignored.
   893     if (uri == XHTML_NS) {
   894       this._buf += "<" + localName;
   895       var uri;
   896       for (var i=0; i < attributes.length; ++i) {
   897         uri = attributes.getURI(i);
   898         // XHTML attributes aren't in a namespace
   899         if (uri == "") { 
   900           this._buf += (" " + attributes.getLocalName(i) + "='" +
   901                         xmlEscape(attributes.getValue(i)) + "'");
   902         } else {
   903           // write a small set of allowed attribute namespaces
   904           var prefix = gAllowedXHTMLNamespaces[uri];
   905           if (prefix != null) {
   906             // The attribute value we'll attempt to write
   907             var attributeValue = xmlEscape(attributes.getValue(i));
   909             // it's an allowed attribute NS.            
   910             // write the attribute
   911             this._buf += (" " + prefix + ":" + 
   912                           attributes.getLocalName(i) + 
   913                           "='" + attributeValue + "'");
   915             // write an xmlns declaration if necessary
   916             if (prefix != "xml" && !this._isInScope(uri)) {
   917               this._inScopeNS[this._inScopeNS.length - 1].push(uri);
   918               this._buf += " xmlns:" + prefix + "='" + uri + "'";
   919             }
   920           }
   921         }
   922       }
   923       this._buf += ">";
   924     }
   925   },
   926   endElement: function XH_endElement(uri, localName, qName) {
   927     --this._depth;
   928     this._inScopeNS.pop();
   930     // We need to skip outer divs in Atom. See comment in startElement.
   931     if (this._isAtom && this._depth == 0 && localName == "div")
   932       return;
   934     // When we peek too far, go back to the main processor
   935     if (this._depth < 0) {
   936       this._processor.returnFromXHTMLHandler(this._buf.trim(),
   937                                              uri, localName, qName);
   938       return;
   939     }
   940     // If it's an XHTML element, record it. Otherwise, it's ignored.
   941     if (uri == XHTML_NS) {
   942       this._buf += "</" + localName + ">";
   943     }
   944   },
   945   characters: function XH_characters(data) {
   946     this._buf += xmlEscape(data);
   947   },
   948   startPrefixMapping: function XH_startPrefixMapping(prefix, uri) {
   949   },
   950   endPrefixMapping: function FP_endPrefixMapping(prefix) {
   951   },
   952   processingInstruction: function XH_processingInstruction() {
   953   }, 
   954 }
   956 /**
   957  * The ExtensionHandler deals with elements we haven't explicitly
   958  * added to our transition table in the FeedProcessor.
   959  */
   960 function ExtensionHandler(processor) {
   961   this._buf = "";
   962   this._depth = 0;
   963   this._hasChildElements = false;
   965   // The FeedProcessor
   966   this._processor = processor;
   968   // Fields of the outermost extension element.
   969   this._localName = null;
   970   this._uri = null;
   971   this._qName = null;
   972   this._attrs = null;
   973 }
   975 ExtensionHandler.prototype = {
   976   startDocument: function EH_startDocument() {
   977   },
   978   endDocument: function EH_endDocument() {
   979   },
   980   startElement: function EH_startElement(uri, localName, qName, attrs) {
   981     ++this._depth;
   982     var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : "";
   983     var key =  prefix + localName;
   985     if (this._depth == 1) {
   986       this._uri = uri;
   987       this._localName = localName;
   988       this._qName = qName;
   989       this._attrs = attrs;
   990     }
   992     // if we descend into another element, we won't send text
   993     this._hasChildElements = (this._depth > 1);
   995   },
   996   endElement: function EH_endElement(uri, localName, qName) {
   997     --this._depth;
   998     if (this._depth == 0) {
   999       var text = this._hasChildElements ? null : this._buf.trim();
  1000       this._processor.returnFromExtHandler(this._uri, this._localName, 
  1001                                            text, this._attrs);
  1003   },
  1004   characters: function EH_characters(data) {
  1005     if (!this._hasChildElements)
  1006       this._buf += data;
  1007   },
  1008   startPrefixMapping: function EH_startPrefixMapping() {
  1009   },
  1010   endPrefixMapping: function EH_endPrefixMapping() {
  1011   },
  1012   processingInstruction: function EH_processingInstruction() {
  1013   }, 
  1014 };
  1017 /**
  1018  * ElementInfo is a simple container object that describes
  1019  * some characteristics of a feed element. For example, it
  1020  * says whether an element can be expected to appear more
  1021  * than once inside a given entry or feed.
  1022  */ 
  1023 function ElementInfo(fieldName, containerClass, closeFunc, isArray) {
  1024   this.fieldName = fieldName;
  1025   this.containerClass = containerClass;
  1026   this.closeFunc = closeFunc;
  1027   this.isArray = isArray;
  1028   this.isWrapper = false;
  1031 /**
  1032  * FeedElementInfo represents a feed element, usually the root.
  1033  */
  1034 function FeedElementInfo(fieldName, feedVersion) {
  1035   this.isWrapper = false;
  1036   this.fieldName = fieldName;
  1037   this.feedVersion = feedVersion;
  1040 /**
  1041  * Some feed formats include vestigial wrapper elements that we don't
  1042  * want to include in our object model, but we do need to keep track
  1043  * of during parsing.
  1044  */
  1045 function WrapperElementInfo(fieldName) {
  1046   this.isWrapper = true;
  1047   this.fieldName = fieldName;
  1050 /***** The Processor *****/
  1051 function FeedProcessor() {
  1052   this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader);
  1053   this._buf =  "";
  1054   this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
  1055   this._handlerStack = [];
  1056   this._xmlBaseStack = []; // sparse array keyed to nesting depth
  1057   this._depth = 0;
  1058   this._state = "START";
  1059   this._result = null;
  1060   this._extensionHandler = null;
  1061   this._xhtmlHandler = null;
  1062   this._haveSentResult = false;
  1064   // The nsIFeedResultListener waiting for the parse results
  1065   this.listener = null;
  1067   // These elements can contain (X)HTML or plain text.
  1068   // We keep a table here that contains their default treatment
  1069   this._textConstructs = {"atom:title":"text",
  1070                           "atom:summary":"text",
  1071                           "atom:rights":"text",
  1072                           "atom:content":"text",
  1073                           "atom:subtitle":"text",
  1074                           "description":"html",
  1075                           "rss1:description":"html",
  1076                           "dc:description":"html",
  1077                           "content:encoded":"html",
  1078                           "title":"text",
  1079                           "rss1:title":"text",
  1080                           "atom03:title":"text",
  1081                           "atom03:tagline":"text",
  1082                           "atom03:summary":"text",
  1083                           "atom03:content":"text"};
  1084   this._stack = [];
  1086   this._trans = {   
  1087     "START": {
  1088       //If we hit a root RSS element, treat as RSS2.
  1089       "rss": new FeedElementInfo("RSS2", "rss2"),
  1091       // If we hit an RDF element, if could be RSS1, but we can't
  1092       // verify that until we hit a rss1:channel element.
  1093       "rdf:RDF": new WrapperElementInfo("RDF"),
  1095       // If we hit a Atom 1.0 element, treat as Atom 1.0.
  1096       "atom:feed": new FeedElementInfo("Atom", "atom"),
  1098       // Treat as Atom 0.3
  1099       "atom03:feed": new FeedElementInfo("Atom03", "atom03"),
  1100     },
  1102     /********* RSS2 **********/
  1103     "IN_RSS2": {
  1104       "channel": new WrapperElementInfo("channel")
  1105     },
  1107     "IN_CHANNEL": {
  1108       "item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
  1109       "managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1110                                         rssAuthor, true),
  1111       "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1112                                     rssAuthor, true),
  1113       "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1114                                    rssAuthor, true),
  1115       "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
  1116                                          rssAuthor, true),
  1117       "category": new ElementInfo("categories", null, rssCatTerm, true),
  1118       "cloud": new ElementInfo("cloud", null, null, false),
  1119       "image": new ElementInfo("image", null, null, false),
  1120       "textInput": new ElementInfo("textInput", null, null, false),
  1121       "skipDays": new ElementInfo("skipDays", null, null, false),
  1122       "skipHours": new ElementInfo("skipHours", null, null, false),
  1123       "generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
  1124                                    atomGenerator, false),
  1125     },
  1127     "IN_ITEMS": {
  1128       "author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1129                                 rssAuthor, true),
  1130       "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1131                                     rssAuthor, true),
  1132       "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1133                                    rssAuthor, true),
  1134       "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
  1135                                          rssAuthor, true),
  1136       "category": new ElementInfo("categories", null, rssCatTerm, true),
  1137       "enclosure": new ElementInfo("enclosure", null, null, false),
  1138       "media:content": new ElementInfo("mediacontent", null, null, true),
  1139       "media:group": new ElementInfo("mediagroup", null, null, false),
  1140       "guid": new ElementInfo("guid", null, rssGuid, false)
  1141     },
  1143     "IN_SKIPDAYS": {
  1144       "day": new ElementInfo("days", null, rssArrayElement, true)
  1145     },
  1147     "IN_SKIPHOURS":{
  1148       "hour": new ElementInfo("hours", null, rssArrayElement, true)
  1149     },
  1151     "IN_MEDIAGROUP": {
  1152       "media:content": new ElementInfo("mediacontent", null, null, true)
  1153     },
  1155     /********* RSS1 **********/
  1156     "IN_RDF": {
  1157       // If we hit a rss1:channel, we can verify that we have RSS1
  1158       "rss1:channel": new FeedElementInfo("rdf_channel", "rss1"),
  1159       "rss1:image": new ElementInfo("image", null, null, false),
  1160       "rss1:textinput": new ElementInfo("textInput", null, null, false),
  1161       "rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
  1162     },
  1164     "IN_RDF_CHANNEL": {
  1165       "admin:generatorAgent": new ElementInfo("generator",
  1166                                               Cc[GENERATOR_CONTRACTID],
  1167                                               null, false),
  1168       "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1169                                     rssAuthor, true),
  1170       "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1171                                    rssAuthor, true),
  1172       "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
  1173                                          rssAuthor, true),
  1174     },
  1176     /********* ATOM 1.0 **********/
  1177     "IN_ATOM": {
  1178       "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1179                                      null, true),
  1180       "atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
  1181                                         atomGenerator, false),
  1182       "atom:contributor": new ElementInfo("contributors",  Cc[PERSON_CONTRACTID],
  1183                                           null, true),
  1184       "atom:link": new ElementInfo("links", null, null, true),
  1185       "atom:logo": new ElementInfo("atom:logo", null, atomLogo, false),
  1186       "atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID],
  1187                                     null, true)
  1188     },
  1190     "IN_ENTRIES": {
  1191       "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1192                                      null, true),
  1193       "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
  1194                                           null, true),
  1195       "atom:link": new ElementInfo("links", null, null, true),
  1196     },
  1198     /********* ATOM 0.3 **********/
  1199     "IN_ATOM03": {
  1200       "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1201                                        null, true),
  1202       "atom03:contributor": new ElementInfo("contributors",
  1203                                             Cc[PERSON_CONTRACTID],
  1204                                             null, true),
  1205       "atom03:link": new ElementInfo("links", null, null, true),
  1206       "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
  1207                                       null, true),
  1208       "atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
  1209                                           atomGenerator, false),
  1210     },
  1212     "IN_ATOM03_ENTRIES": {
  1213       "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
  1214                                        null, true),
  1215       "atom03:contributor": new ElementInfo("contributors",
  1216                                             Cc[PERSON_CONTRACTID],
  1217                                             null, true),
  1218       "atom03:link": new ElementInfo("links", null, null, true),
  1219       "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
  1220                                       null, true)
  1225 // See startElement for a long description of how feeds are processed.
  1226 FeedProcessor.prototype = { 
  1228   // Set ourselves as the SAX handler, and set the base URI
  1229   _init: function FP_init(uri) {
  1230     this._reader.contentHandler = this;
  1231     this._reader.errorHandler = this;
  1232     this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult);
  1233     if (uri) {
  1234       this._result.uri = uri;
  1235       this._reader.baseURI = uri;
  1236       this._xmlBaseStack[0] = uri;
  1238   },
  1240   // This function is called once we figure out what type of feed
  1241   // we're dealing with. Some feed types require digging a bit further
  1242   // than the root.
  1243   _docVerified: function FP_docVerified(version) {
  1244     this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed);
  1245     this._result.doc.baseURI = 
  1246       this._xmlBaseStack[this._xmlBaseStack.length - 1];
  1247     this._result.doc.fields = this._feed;
  1248     this._result.version = version;
  1249   },
  1251   // When we're done with the feed, let the listener know what
  1252   // happened.
  1253   _sendResult: function FP_sendResult() {
  1254     this._haveSentResult = true;
  1255     try {
  1256       // Can be null when a non-feed is fed to us
  1257       if (this._result.doc)
  1258         this._result.doc.normalize();
  1260     catch (e) {
  1261       LOG("FIXME: " + e);
  1264     try {
  1265       if (this.listener != null)
  1266         this.listener.handleResult(this._result);
  1268     finally {
  1269       this._result = null;
  1271   },
  1273   // Parsing functions
  1274   parseFromStream: function FP_parseFromStream(stream, uri) {
  1275     this._init(uri);
  1276     this._reader.parseFromStream(stream, null, stream.available(), 
  1277                                  "application/xml");
  1278     this._reader = null;
  1279   },
  1281   parseFromString: function FP_parseFromString(inputString, uri) {
  1282     this._init(uri);
  1283     this._reader.parseFromString(inputString, "application/xml");
  1284     this._reader = null;
  1285   },
  1287   parseAsync: function FP_parseAsync(requestObserver, uri) {
  1288     this._init(uri);
  1289     this._reader.parseAsync(requestObserver);
  1290   },
  1292   // nsIStreamListener 
  1294   // The XMLReader will throw sensible exceptions if these get called
  1295   // out of order.
  1296   onStartRequest: function FP_onStartRequest(request, context) {
  1297     // this will throw if the request is not a channel, but so will nsParser.
  1298     var channel = request.QueryInterface(Ci.nsIChannel);
  1299     channel.contentType = "application/vnd.mozilla.maybe.feed";
  1300     this._reader.onStartRequest(request, context);
  1301   },
  1303   onStopRequest: function FP_onStopRequest(request, context, statusCode) {
  1304     try {
  1305       this._reader.onStopRequest(request, context, statusCode);
  1307     finally {
  1308       this._reader = null;
  1310   },
  1312   onDataAvailable:
  1313   function FP_onDataAvailable(request, context, inputStream, offset, count) {
  1314     this._reader.onDataAvailable(request, context, inputStream, offset, count);
  1315   },
  1317   // nsISAXErrorHandler
  1319   // We only care about fatal errors. When this happens, we may have
  1320   // parsed through the feed metadata and some number of entries. The
  1321   // listener can still show some of that data if it wants, and we'll
  1322   // set the bozo bit to indicate we were unable to parse all the way
  1323   // through.
  1324   fatalError: function FP_reportError() {
  1325     this._result.bozo = true;
  1326     //XXX need to QI to FeedProgressListener
  1327     if (!this._haveSentResult)
  1328       this._sendResult();
  1329   },
  1331   // nsISAXContentHandler
  1333   startDocument: function FP_startDocument() {
  1334     //LOG("----------");
  1335   },
  1337   endDocument: function FP_endDocument() {
  1338     if (!this._haveSentResult)
  1339       this._sendResult();
  1340   },
  1342   // The transitions defined above identify elements that contain more
  1343   // than just text. For example RSS items contain many fields, and so
  1344   // do Atom authors. The only commonly used elements that contain
  1345   // mixed content are Atom Text Constructs of type="xhtml", which we
  1346   // delegate to another handler for cleaning. That leaves a couple
  1347   // different types of elements to deal with: those that should occur
  1348   // only once, such as title elements, and those that can occur
  1349   // multiple times, such as the RSS category element and the Atom
  1350   // link element. Most of the RSS1/DC elements can occur multiple
  1351   // times in theory, but in practice, the only ones that do have
  1352   // analogues in Atom. 
  1353   //
  1354   // Some elements are also groups of attributes or sub-elements,
  1355   // while others are simple text fields. For the most part, we don't
  1356   // have to pay explicit attention to the simple text elements,
  1357   // unless we want to post-process the resulting string to transform
  1358   // it into some richer object like a Date or URI.
  1359   //
  1360   // Elements that have more sophisticated content models still end up
  1361   // being dictionaries, whether they are based on attributes like RSS
  1362   // cloud, sub-elements like Atom author, or even items and
  1363   // entries. These elements are treated as "containers". It's
  1364   // theoretically possible for a container to have an attribute with 
  1365   // the same universal name as a sub-element, but none of the feed
  1366   // formats allow this by default, and I don't of any extension that
  1367   // works this way.
  1368   //
  1369   startElement: function FP_startElement(uri, localName, qName, attributes) {
  1370     this._buf = "";
  1371     ++this._depth;
  1372     var elementInfo;
  1374     //LOG("<" + localName + ">");
  1376     // Check for xml:base
  1377     var base = attributes.getValueFromName(XMLNS, "base");
  1378     if (base) {
  1379       this._xmlBaseStack[this._depth] =
  1380         strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]);
  1383     // To identify the element we're dealing with, we look up the
  1384     // namespace URI in our gNamespaces dictionary, which will give us
  1385     // a "canonical" prefix for a namespace URI. For example, this
  1386     // allows Dublin Core "creator" elements to be consistently mapped
  1387     // to "dc:creator", for easy field access by consumer code. This
  1388     // strategy also happens to shorten up our state table.
  1389     var key =  this._prefixForNS(uri) + localName;
  1391     // Check to see if we need to hand this off to our XHTML handler.
  1392     // The elements we're dealing with will look like this:
  1393     // 
  1394     // <title type="xhtml">
  1395     //   <div xmlns="http://www.w3.org/1999/xhtml">
  1396     //     A title with <b>bold</b> and <i>italics</i>.
  1397     //   </div>
  1398     // </title>
  1399     //
  1400     // When it returns in returnFromXHTMLHandler, the handler should
  1401     // give us back a string like this: 
  1402     // 
  1403     //    "A title with <b>bold</b> and <i>italics</i>."
  1404     //
  1405     // The Atom spec explicitly says the div is not part of the content,
  1406     // and explicitly allows whitespace collapsing.
  1407     // 
  1408     if ((this._result.version == "atom" || this._result.version == "atom03") &&
  1409         this._textConstructs[key] != null) {
  1410       var type = attributes.getValueFromName("","type");
  1411       if (type != null && type.indexOf("xhtml") >= 0) {
  1412         this._xhtmlHandler = 
  1413           new XHTMLHandler(this, (this._result.version == "atom"));
  1414         this._reader.contentHandler = this._xhtmlHandler;
  1415         return;
  1419     // Check our current state, and see if that state has a defined
  1420     // transition. For example, this._trans["atom:entry"]["atom:author"]
  1421     // will have one, and it tells us to add an item to our authors array.
  1422     if (this._trans[this._state] && this._trans[this._state][key]) {
  1423       elementInfo = this._trans[this._state][key];
  1425     else {
  1426       // If we don't have a transition, hand off to extension handler
  1427       this._extensionHandler = new ExtensionHandler(this);
  1428       this._reader.contentHandler = this._extensionHandler;
  1429       this._extensionHandler.startElement(uri, localName, qName, attributes);
  1430       return;
  1433     // This distinguishes wrappers like 'channel' from elements
  1434     // we'd actually like to do something with (which will test true).
  1435     this._handlerStack[this._depth] = elementInfo; 
  1436     if (elementInfo.isWrapper) {
  1437       this._state = "IN_" + elementInfo.fieldName.toUpperCase();
  1438       this._stack.push([this._feed, this._state]);
  1440     else if (elementInfo.feedVersion) {
  1441       this._state = "IN_" + elementInfo.fieldName.toUpperCase();
  1443       // Check for the older RSS2 variants
  1444       if (elementInfo.feedVersion == "rss2")
  1445         elementInfo.feedVersion = this._findRSSVersion(attributes);
  1446       else if (uri == RSS090NS)
  1447         elementInfo.feedVersion = "rss090";
  1449       this._docVerified(elementInfo.feedVersion);
  1450       this._stack.push([this._feed, this._state]);
  1451       this._mapAttributes(this._feed, attributes);
  1453     else {
  1454       this._state = this._processComplexElement(elementInfo, attributes);
  1456   },
  1458   // In the endElement handler, we decrement the stack and look
  1459   // for cleanup/transition functions to execute. The second part
  1460   // of the state transition works as above in startElement, but
  1461   // the state we're looking for is prefixed with an underscore
  1462   // to distinguish endElement events from startElement events.
  1463   endElement:  function FP_endElement(uri, localName, qName) {
  1464     var elementInfo = this._handlerStack[this._depth];
  1465     //LOG("</" + localName + ">");
  1466     if (elementInfo && !elementInfo.isWrapper)
  1467       this._closeComplexElement(elementInfo);
  1469     // cut down xml:base context
  1470     if (this._xmlBaseStack.length == this._depth + 1)
  1471       this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth);
  1473     // our new state is whatever is at the top of the stack now
  1474     if (this._stack.length > 0)
  1475       this._state = this._stack[this._stack.length - 1][1];
  1476     this._handlerStack = this._handlerStack.slice(0, this._depth);
  1477     --this._depth;
  1478   },
  1480   // Buffer up character data. The buffer is cleared with every
  1481   // opening element.
  1482   characters: function FP_characters(data) {
  1483     this._buf += data;
  1484   },
  1485   // TODO: It would be nice to check new prefixes here, and if they
  1486   // don't conflict with the ones we've defined, throw them in a 
  1487   // dictionary to check.
  1488   startPrefixMapping: function FP_startPrefixMapping(prefix, uri) {
  1489   },
  1491   endPrefixMapping: function FP_endPrefixMapping(prefix) {
  1492   },
  1494   processingInstruction: function FP_processingInstruction(target, data) {
  1495     if (target == "xml-stylesheet") {
  1496       var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/);
  1497       if (hrefAttribute && hrefAttribute.length == 2) 
  1498         this._result.stylesheet = strToURI(hrefAttribute[1], this._result.uri);
  1500   },
  1502   // end of nsISAXContentHandler
  1504   // Handle our more complicated elements--those that contain
  1505   // attributes and child elements.
  1506   _processComplexElement:
  1507   function FP__processComplexElement(elementInfo, attributes) {
  1508     var obj, key, prefix;
  1510     // If the container is an entry/item, it'll need to have its 
  1511     // more esoteric properties put in the 'fields' property bag.
  1512     if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) {
  1513       obj = elementInfo.containerClass.createInstance(Ci.nsIFeedEntry);
  1514       obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
  1515       this._mapAttributes(obj.fields, attributes);
  1517     else if (elementInfo.containerClass) {
  1518       obj = elementInfo.containerClass.createInstance(Ci.nsIFeedElementBase);
  1519       obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
  1520       obj.attributes = attributes; // just set the SAX attributes
  1522     else {
  1523       obj = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
  1524       this._mapAttributes(obj, attributes);
  1527     // We should have a container/propertyBag that's had its
  1528     // attributes processed. Now we need to attach it to its
  1529     // container.
  1530     var newProp;
  1532     // First we'll see what's on top of the stack.
  1533     var container = this._stack[this._stack.length - 1][0];
  1535     // Check to see if it has the property
  1536     var prop;
  1537     try {
  1538       prop = container.getProperty(elementInfo.fieldName);
  1540     catch(e) {
  1543     if (elementInfo.isArray) {
  1544       if (!prop) {
  1545         container.setPropertyAsInterface(elementInfo.fieldName,
  1546                                          Cc[ARRAY_CONTRACTID].
  1547                                          createInstance(Ci.nsIMutableArray));
  1550       newProp = container.getProperty(elementInfo.fieldName);
  1551       // XXX This QI should not be necessary, but XPConnect seems to fly
  1552       // off the handle in the browser, and loses track of the interface
  1553       // on large files. Bug 335638.
  1554       newProp.QueryInterface(Ci.nsIMutableArray);
  1555       newProp.appendElement(obj,false);
  1557       // If new object is an nsIFeedContainer, we want to deal with
  1558       // its member nsIPropertyBag instead.
  1559       if (isIFeedContainer(obj))
  1560         newProp = obj.fields; 
  1563     else {
  1564       // If it doesn't, set it.
  1565       if (!prop) {
  1566         container.setPropertyAsInterface(elementInfo.fieldName,obj);
  1568       newProp = container.getProperty(elementInfo.fieldName);
  1571     // make our new state name, and push the property onto the stack
  1572     var newState = "IN_" + elementInfo.fieldName.toUpperCase();
  1573     this._stack.push([newProp, newState, obj]);
  1574     return newState;
  1575   },
  1577   // Sometimes we need reconcile the element content with the object
  1578   // model for a given feed. We use helper functions to do the
  1579   // munging, but we need to identify array types here, so the munging
  1580   // happens only to the last element of an array.
  1581   _closeComplexElement: function FP__closeComplexElement(elementInfo) {
  1582     var stateTuple = this._stack.pop();
  1583     var container = stateTuple[0];
  1584     var containerParent = stateTuple[2];
  1585     var element = null;
  1586     var isArray = isIArray(container);
  1588     // If it's an array and we have to post-process,
  1589     // grab the last element
  1590     if (isArray)
  1591       element = container.queryElementAt(container.length - 1, Ci.nsISupports);
  1592     else
  1593       element = container;
  1595     // Run the post-processing function if there is one.
  1596     if (elementInfo.closeFunc)
  1597       element = elementInfo.closeFunc(this._buf, element);
  1599     // If an nsIFeedContainer was on top of the stack,
  1600     // we need to normalize it
  1601     if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID])
  1602       containerParent.normalize();
  1604     // If it's an array, re-set the last element
  1605     if (isArray)
  1606       container.replaceElementAt(element, container.length - 1, false);
  1607   },
  1609   _prefixForNS: function FP_prefixForNS(uri) {
  1610     if (!uri)
  1611       return "";
  1612     var prefix = gNamespaces[uri];
  1613     if (prefix)
  1614       return prefix + ":";
  1615     if (uri.toLowerCase().indexOf("http://backend.userland.com") == 0)
  1616       return "";
  1617     else
  1618       return null;
  1619   },
  1621   _mapAttributes: function FP__mapAttributes(bag, attributes) {
  1622     // Cycle through the attributes, and set our properties using the
  1623     // prefix:localNames we find in our namespace dictionary.
  1624     for (var i = 0; i < attributes.length; ++i) {
  1625       var key = this._prefixForNS(attributes.getURI(i)) + attributes.getLocalName(i);
  1626       var val = attributes.getValue(i);
  1627       bag.setPropertyAsAString(key, val);
  1629   },
  1631   // Only for RSS2esque formats
  1632   _findRSSVersion: function FP__findRSSVersion(attributes) {
  1633     var versionAttr = attributes.getValueFromName("", "version").trim();
  1634     var versions = { "0.91":"rss091",
  1635                      "0.92":"rss092",
  1636                      "0.93":"rss093",
  1637                      "0.94":"rss094" }
  1638     if (versions[versionAttr])
  1639       return versions[versionAttr];
  1640     if (versionAttr.substr(0,2) != "2.")
  1641       return "rssUnknown";
  1642     return "rss2";
  1643   },
  1645   // unknown element values are returned here. See startElement above
  1646   // for how this works.
  1647   returnFromExtHandler:
  1648   function FP_returnExt(uri, localName, chars, attributes) {
  1649     --this._depth;
  1651     // take control of the SAX events
  1652     this._reader.contentHandler = this;
  1653     if (localName == null && chars == null)
  1654       return;
  1656     // we don't take random elements inside rdf:RDF
  1657     if (this._state == "IN_RDF")
  1658       return;
  1660     // Grab the top of the stack
  1661     var top = this._stack[this._stack.length - 1];
  1662     if (!top) 
  1663       return;
  1665     var container = top[0];
  1666     // Grab the last element if it's an array
  1667     if (isIArray(container)) {
  1668       var contract = this._handlerStack[this._depth].containerClass;
  1669       // check if it's something specific, but not an entry
  1670       if (contract && contract != Cc[ENTRY_CONTRACTID]) {
  1671         var el = container.queryElementAt(container.length - 1, 
  1672                                           Ci.nsIFeedElementBase);
  1673         // XXX there must be a way to flatten these interfaces
  1674         if (contract == Cc[PERSON_CONTRACTID]) 
  1675           el.QueryInterface(Ci.nsIFeedPerson);
  1676         else
  1677           return; // don't know about this interface
  1679         var propName = localName;
  1680         var prefix = gNamespaces[uri];
  1682         // synonyms
  1683         if ((uri == "" || 
  1684              prefix &&
  1685              ((prefix.indexOf("atom") > -1) ||
  1686               (prefix.indexOf("rss") > -1))) && 
  1687             (propName == "url" || propName == "href"))
  1688           propName = "uri";
  1690         try {
  1691           if (el[propName] !== "undefined") {
  1692             var propValue = chars;
  1693             // convert URI-bearing values to an nsIURI
  1694             if (propName == "uri") {
  1695               var base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
  1696               propValue = strToURI(chars, base);
  1698             el[propName] = propValue;
  1701         catch(e) {
  1702           // ignore XPConnect errors
  1704         // the rest of the function deals with entry- and feed-level stuff
  1705         return; 
  1707       else {
  1708         container = container.queryElementAt(container.length - 1, 
  1709                                              Ci.nsIWritablePropertyBag2);
  1713     // Make the buffer our new property
  1714     var propName = this._prefixForNS(uri) + localName;
  1716     // But, it could be something containing HTML. If so,
  1717     // we need to know about that.
  1718     if (this._textConstructs[propName] != null &&
  1719         this._handlerStack[this._depth].containerClass !== null) {
  1720       var newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
  1721                     createInstance(Ci.nsIFeedTextConstruct);
  1722       newProp.text = chars;
  1723       // Look up the default type in our table
  1724       var type = this._textConstructs[propName];
  1725       var typeAttribute = attributes.getValueFromName("","type");
  1726       if (this._result.version == "atom" && typeAttribute != null) {
  1727         type = typeAttribute;
  1729       else if (this._result.version == "atom03" && typeAttribute != null) {
  1730         if (typeAttribute.toLowerCase().indexOf("xhtml") >= 0) {
  1731           type = "xhtml";
  1733         else if (typeAttribute.toLowerCase().indexOf("html") >= 0) {
  1734           type = "html";
  1736         else if (typeAttribute.toLowerCase().indexOf("text") >= 0) {
  1737           type = "text";
  1741       // If it's rss feed-level description, it's not supposed to have html
  1742       if (this._result.version.indexOf("rss") >= 0 &&
  1743           this._handlerStack[this._depth].containerClass != ENTRY_CONTRACTID) {
  1744         type = "text";
  1746       newProp.type = type;
  1747       newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
  1748       container.setPropertyAsInterface(propName, newProp);
  1750     else {
  1751       container.setPropertyAsAString(propName, chars);
  1753   },
  1755   // Sometimes, we'll hand off SAX handling duties to an XHTMLHandler
  1756   // (see above) that will scrape out non-XHTML stuff, normalize
  1757   // namespaces, and remove the wrapper div from Atom 1.0. When the
  1758   // XHTMLHandler is done, it'll callback here.
  1759   returnFromXHTMLHandler:
  1760   function FP_returnFromXHTMLHandler(chars, uri, localName, qName) {
  1761     // retake control of the SAX content events
  1762     this._reader.contentHandler = this;
  1764     // Grab the top of the stack
  1765     var top = this._stack[this._stack.length - 1];
  1766     if (!top) 
  1767       return;
  1768     var container = top[0];
  1770     // Assign the property
  1771     var newProp =  newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
  1772                    createInstance(Ci.nsIFeedTextConstruct);
  1773     newProp.text = chars;
  1774     newProp.type = "xhtml";
  1775     newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
  1776     container.setPropertyAsInterface(this._prefixForNS(uri) + localName,
  1777                                      newProp);
  1779     // XHTML will cause us to peek too far. The XHTML handler will
  1780     // send us an end element to call. RFC4287-valid feeds allow a
  1781     // more graceful way to handle this. Unfortunately, we can't count
  1782     // on compliance at this point.
  1783     this.endElement(uri, localName, qName);
  1784   },
  1786   // XPCOM stuff
  1787   classID: FP_CLASSID,
  1788   QueryInterface: XPCOMUtils.generateQI(
  1789     [Ci.nsIFeedProcessor, Ci.nsISAXContentHandler, Ci.nsISAXErrorHandler,
  1790      Ci.nsIStreamListener, Ci.nsIRequestObserver]
  1794 var components = [FeedProcessor, FeedResult, Feed, Entry,
  1795                   TextConstruct, Generator, Person];
  1797 this.NSGetFactory = XPCOMUtils.generateNSGetFactory(components);

mercurial