toolkit/components/feeds/FeedProcessor.js

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 function LOG(str) {
michael@0 7 dump("*** " + str + "\n");
michael@0 8 }
michael@0 9
michael@0 10 const Ci = Components.interfaces;
michael@0 11 const Cc = Components.classes;
michael@0 12 const Cr = Components.results;
michael@0 13 Components.utils.import("resource://gre/modules/XPCOMUtils.jsm");
michael@0 14
michael@0 15 const FP_CONTRACTID = "@mozilla.org/feed-processor;1";
michael@0 16 const FP_CLASSID = Components.ID("{26acb1f0-28fc-43bc-867a-a46aabc85dd4}");
michael@0 17 const FP_CLASSNAME = "Feed Processor";
michael@0 18 const FR_CONTRACTID = "@mozilla.org/feed-result;1";
michael@0 19 const FR_CLASSID = Components.ID("{072a5c3d-30c6-4f07-b87f-9f63d51403f2}");
michael@0 20 const FR_CLASSNAME = "Feed Result";
michael@0 21 const FEED_CONTRACTID = "@mozilla.org/feed;1";
michael@0 22 const FEED_CLASSID = Components.ID("{5d0cfa97-69dd-4e5e-ac84-f253162e8f9a}");
michael@0 23 const FEED_CLASSNAME = "Feed";
michael@0 24 const ENTRY_CONTRACTID = "@mozilla.org/feed-entry;1";
michael@0 25 const ENTRY_CLASSID = Components.ID("{8e4444ff-8e99-4bdd-aa7f-fb3c1c77319f}");
michael@0 26 const ENTRY_CLASSNAME = "Feed Entry";
michael@0 27 const TEXTCONSTRUCT_CONTRACTID = "@mozilla.org/feed-textconstruct;1";
michael@0 28 const TEXTCONSTRUCT_CLASSID =
michael@0 29 Components.ID("{b992ddcd-3899-4320-9909-924b3e72c922}");
michael@0 30 const TEXTCONSTRUCT_CLASSNAME = "Feed Text Construct";
michael@0 31 const GENERATOR_CONTRACTID = "@mozilla.org/feed-generator;1";
michael@0 32 const GENERATOR_CLASSID =
michael@0 33 Components.ID("{414af362-9ad8-4296-898e-62247f25a20e}");
michael@0 34 const GENERATOR_CLASSNAME = "Feed Generator";
michael@0 35 const PERSON_CONTRACTID = "@mozilla.org/feed-person;1";
michael@0 36 const PERSON_CLASSID = Components.ID("{95c963b7-20b2-11db-92f6-001422106990}");
michael@0 37 const PERSON_CLASSNAME = "Feed Person";
michael@0 38
michael@0 39 const IO_CONTRACTID = "@mozilla.org/network/io-service;1"
michael@0 40 const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1"
michael@0 41 const ARRAY_CONTRACTID = "@mozilla.org/array;1";
michael@0 42 const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1";
michael@0 43 const PARSERUTILS_CONTRACTID = "@mozilla.org/parserutils;1";
michael@0 44
michael@0 45
michael@0 46 var gIoService = null;
michael@0 47
michael@0 48 const XMLNS = "http://www.w3.org/XML/1998/namespace";
michael@0 49 const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/";
michael@0 50
michael@0 51 /***** Some general utils *****/
michael@0 52 function strToURI(link, base) {
michael@0 53 var base = base || null;
michael@0 54 if (!gIoService)
michael@0 55 gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService);
michael@0 56 try {
michael@0 57 return gIoService.newURI(link, null, base);
michael@0 58 }
michael@0 59 catch(e) {
michael@0 60 return null;
michael@0 61 }
michael@0 62 }
michael@0 63
michael@0 64 function isArray(a) {
michael@0 65 return isObject(a) && a.constructor == Array;
michael@0 66 }
michael@0 67
michael@0 68 function isObject(a) {
michael@0 69 return (a && typeof a == "object") || isFunction(a);
michael@0 70 }
michael@0 71
michael@0 72 function isFunction(a) {
michael@0 73 return typeof a == "function";
michael@0 74 }
michael@0 75
michael@0 76 function isIID(a, iid) {
michael@0 77 var rv = false;
michael@0 78 try {
michael@0 79 a.QueryInterface(iid);
michael@0 80 rv = true;
michael@0 81 }
michael@0 82 catch(e) {
michael@0 83 }
michael@0 84 return rv;
michael@0 85 }
michael@0 86
michael@0 87 function isIArray(a) {
michael@0 88 return isIID(a, Ci.nsIArray);
michael@0 89 }
michael@0 90
michael@0 91 function isIFeedContainer(a) {
michael@0 92 return isIID(a, Ci.nsIFeedContainer);
michael@0 93 }
michael@0 94
michael@0 95 function stripTags(someHTML) {
michael@0 96 return someHTML.replace(/<[^>]+>/g,"");
michael@0 97 }
michael@0 98
michael@0 99 /**
michael@0 100 * Searches through an array of links and returns a JS array
michael@0 101 * of matching property bags.
michael@0 102 */
michael@0 103 const IANA_URI = "http://www.iana.org/assignments/relation/";
michael@0 104 function findAtomLinks(rel, links) {
michael@0 105 var rvLinks = [];
michael@0 106 for (var i = 0; i < links.length; ++i) {
michael@0 107 var linkElement = links.queryElementAt(i, Ci.nsIPropertyBag2);
michael@0 108 // atom:link MUST have @href
michael@0 109 if (bagHasKey(linkElement, "href")) {
michael@0 110 var relAttribute = null;
michael@0 111 if (bagHasKey(linkElement, "rel"))
michael@0 112 relAttribute = linkElement.getPropertyAsAString("rel")
michael@0 113 if ((!relAttribute && rel == "alternate") || relAttribute == rel) {
michael@0 114 rvLinks.push(linkElement);
michael@0 115 continue;
michael@0 116 }
michael@0 117 // catch relations specified by IANA URI
michael@0 118 if (relAttribute == IANA_URI + rel) {
michael@0 119 rvLinks.push(linkElement);
michael@0 120 }
michael@0 121 }
michael@0 122 }
michael@0 123 return rvLinks;
michael@0 124 }
michael@0 125
michael@0 126 function xmlEscape(s) {
michael@0 127 s = s.replace(/&/g, "&amp;");
michael@0 128 s = s.replace(/>/g, "&gt;");
michael@0 129 s = s.replace(/</g, "&lt;");
michael@0 130 s = s.replace(/"/g, "&quot;");
michael@0 131 s = s.replace(/'/g, "&apos;");
michael@0 132 return s;
michael@0 133 }
michael@0 134
michael@0 135 function arrayContains(array, element) {
michael@0 136 for (var i = 0; i < array.length; ++i) {
michael@0 137 if (array[i] == element) {
michael@0 138 return true;
michael@0 139 }
michael@0 140 }
michael@0 141 return false;
michael@0 142 }
michael@0 143
michael@0 144 // XXX add hasKey to nsIPropertyBag
michael@0 145 function bagHasKey(bag, key) {
michael@0 146 try {
michael@0 147 bag.getProperty(key);
michael@0 148 return true;
michael@0 149 }
michael@0 150 catch (e) {
michael@0 151 return false;
michael@0 152 }
michael@0 153 }
michael@0 154
michael@0 155 function makePropGetter(key) {
michael@0 156 return function FeedPropGetter(bag) {
michael@0 157 try {
michael@0 158 return value = bag.getProperty(key);
michael@0 159 }
michael@0 160 catch(e) {
michael@0 161 }
michael@0 162 return null;
michael@0 163 }
michael@0 164 }
michael@0 165
michael@0 166 const RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
michael@0 167 // namespace map
michael@0 168 var gNamespaces = {
michael@0 169 "http://webns.net/mvcb/":"admin",
michael@0 170 "http://backend.userland.com/rss":"",
michael@0 171 "http://blogs.law.harvard.edu/tech/rss":"",
michael@0 172 "http://www.w3.org/2005/Atom":"atom",
michael@0 173 "http://purl.org/atom/ns#":"atom03",
michael@0 174 "http://purl.org/rss/1.0/modules/content/":"content",
michael@0 175 "http://purl.org/dc/elements/1.1/":"dc",
michael@0 176 "http://purl.org/dc/terms/":"dcterms",
michael@0 177 "http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf",
michael@0 178 "http://purl.org/rss/1.0/":"rss1",
michael@0 179 "http://my.netscape.com/rdf/simple/0.9/":"rss1",
michael@0 180 "http://wellformedweb.org/CommentAPI/":"wfw",
michael@0 181 "http://purl.org/rss/1.0/modules/wiki/":"wiki",
michael@0 182 "http://www.w3.org/XML/1998/namespace":"xml",
michael@0 183 "http://search.yahoo.com/mrss/":"media",
michael@0 184 "http://search.yahoo.com/mrss":"media"
michael@0 185 }
michael@0 186
michael@0 187 // We allow a very small set of namespaces in XHTML content,
michael@0 188 // for attributes only
michael@0 189 var gAllowedXHTMLNamespaces = {
michael@0 190 "http://www.w3.org/XML/1998/namespace":"xml",
michael@0 191 // if someone ns qualifies XHTML, we have to prefix it to avoid an
michael@0 192 // attribute collision.
michael@0 193 "http://www.w3.org/1999/xhtml":"xhtml"
michael@0 194 }
michael@0 195
michael@0 196 function FeedResult() {}
michael@0 197 FeedResult.prototype = {
michael@0 198 bozo: false,
michael@0 199 doc: null,
michael@0 200 version: null,
michael@0 201 headers: null,
michael@0 202 uri: null,
michael@0 203 stylesheet: null,
michael@0 204
michael@0 205 registerExtensionPrefix: function FR_registerExtensionPrefix(ns, prefix) {
michael@0 206 throw Cr.NS_ERROR_NOT_IMPLEMENTED;
michael@0 207 },
michael@0 208
michael@0 209 // XPCOM stuff
michael@0 210 classID: FR_CLASSID,
michael@0 211 QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedResult])
michael@0 212 }
michael@0 213
michael@0 214 function Feed() {
michael@0 215 this.subtitle = null;
michael@0 216 this.title = null;
michael@0 217 this.items = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
michael@0 218 this.link = null;
michael@0 219 this.id = null;
michael@0 220 this.generator = null;
michael@0 221 this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
michael@0 222 this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
michael@0 223 this.baseURI = null;
michael@0 224 this.enclosureCount = 0;
michael@0 225 this.type = Ci.nsIFeed.TYPE_FEED;
michael@0 226 }
michael@0 227
michael@0 228 Feed.prototype = {
michael@0 229 searchLists: {
michael@0 230 title: ["title", "rss1:title", "atom03:title", "atom:title"],
michael@0 231 subtitle: ["description","dc:description","rss1:description",
michael@0 232 "atom03:tagline","atom:subtitle"],
michael@0 233 items: ["items","atom03_entries","entries"],
michael@0 234 id: ["atom:id","rdf:about"],
michael@0 235 generator: ["generator"],
michael@0 236 authors : ["authors"],
michael@0 237 contributors: ["contributors"],
michael@0 238 title: ["title","rss1:title", "atom03:title","atom:title"],
michael@0 239 link: [["link",strToURI],["rss1:link",strToURI]],
michael@0 240 categories: ["categories", "dc:subject"],
michael@0 241 rights: ["atom03:rights","atom:rights"],
michael@0 242 cloud: ["cloud"],
michael@0 243 image: ["image", "rss1:image", "atom:logo"],
michael@0 244 textInput: ["textInput", "rss1:textinput"],
michael@0 245 skipDays: ["skipDays"],
michael@0 246 skipHours: ["skipHours"],
michael@0 247 updated: ["pubDate", "lastBuildDate", "atom03:modified", "dc:date",
michael@0 248 "dcterms:modified", "atom:updated"]
michael@0 249 },
michael@0 250
michael@0 251 normalize: function Feed_normalize() {
michael@0 252 fieldsToObj(this, this.searchLists);
michael@0 253 if (this.skipDays)
michael@0 254 this.skipDays = this.skipDays.getProperty("days");
michael@0 255 if (this.skipHours)
michael@0 256 this.skipHours = this.skipHours.getProperty("hours");
michael@0 257
michael@0 258 if (this.updated)
michael@0 259 this.updated = dateParse(this.updated);
michael@0 260
michael@0 261 // Assign Atom link if needed
michael@0 262 if (bagHasKey(this.fields, "links"))
michael@0 263 this._atomLinksToURI();
michael@0 264
michael@0 265 this._calcEnclosureCountAndFeedType();
michael@0 266
michael@0 267 // Resolve relative image links
michael@0 268 if (this.image && bagHasKey(this.image, "url"))
michael@0 269 this._resolveImageLink();
michael@0 270
michael@0 271 this._resetBagMembersToRawText([this.searchLists.subtitle,
michael@0 272 this.searchLists.title]);
michael@0 273 },
michael@0 274
michael@0 275 _calcEnclosureCountAndFeedType: function Feed_calcEnclosureCountAndFeedType() {
michael@0 276 var entries_with_enclosures = 0;
michael@0 277 var audio_count = 0;
michael@0 278 var image_count = 0;
michael@0 279 var video_count = 0;
michael@0 280 var other_count = 0;
michael@0 281
michael@0 282 for (var i = 0; i < this.items.length; ++i) {
michael@0 283 var entry = this.items.queryElementAt(i, Ci.nsIFeedEntry);
michael@0 284 entry.QueryInterface(Ci.nsIFeedContainer);
michael@0 285
michael@0 286 if (entry.enclosures && entry.enclosures.length > 0) {
michael@0 287 ++entries_with_enclosures;
michael@0 288
michael@0 289 for (var e = 0; e < entry.enclosures.length; ++e) {
michael@0 290 var enc = entry.enclosures.queryElementAt(e, Ci.nsIWritablePropertyBag2);
michael@0 291 if (enc.hasKey("type")) {
michael@0 292 var enctype = enc.get("type");
michael@0 293
michael@0 294 if (/^audio/.test(enctype)) {
michael@0 295 ++audio_count;
michael@0 296 } else if (/^image/.test(enctype)) {
michael@0 297 ++image_count;
michael@0 298 } else if (/^video/.test(enctype)) {
michael@0 299 ++video_count;
michael@0 300 } else {
michael@0 301 ++other_count;
michael@0 302 }
michael@0 303 } else {
michael@0 304 ++other_count;
michael@0 305 }
michael@0 306 }
michael@0 307 }
michael@0 308 }
michael@0 309
michael@0 310 var feedtype = Ci.nsIFeed.TYPE_FEED;
michael@0 311
michael@0 312 // For a feed to be marked as TYPE_VIDEO, TYPE_AUDIO and TYPE_IMAGE,
michael@0 313 // we enforce two things:
michael@0 314 //
michael@0 315 // 1. all entries must have at least one enclosure
michael@0 316 // 2. all enclosures must be video for TYPE_VIDEO, audio for TYPE_AUDIO or image
michael@0 317 // for TYPE_IMAGE
michael@0 318 //
michael@0 319 // Otherwise it's a TYPE_FEED.
michael@0 320 if (entries_with_enclosures == this.items.length && other_count == 0) {
michael@0 321 if (audio_count > 0 && !video_count && !image_count) {
michael@0 322 feedtype = Ci.nsIFeed.TYPE_AUDIO;
michael@0 323
michael@0 324 } else if (image_count > 0 && !audio_count && !video_count) {
michael@0 325 feedtype = Ci.nsIFeed.TYPE_IMAGE;
michael@0 326
michael@0 327 } else if (video_count > 0 && !audio_count && !image_count) {
michael@0 328 feedtype = Ci.nsIFeed.TYPE_VIDEO;
michael@0 329 }
michael@0 330 }
michael@0 331
michael@0 332 this.type = feedtype;
michael@0 333 this.enclosureCount = other_count + video_count + audio_count + image_count;
michael@0 334 },
michael@0 335
michael@0 336 _atomLinksToURI: function Feed_linkToURI() {
michael@0 337 var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
michael@0 338 var alternates = findAtomLinks("alternate", links);
michael@0 339 if (alternates.length > 0) {
michael@0 340 var href = alternates[0].getPropertyAsAString("href");
michael@0 341 var base;
michael@0 342 if (bagHasKey(alternates[0], "xml:base"))
michael@0 343 base = alternates[0].getPropertyAsAString("xml:base");
michael@0 344 this.link = this._resolveURI(href, base);
michael@0 345 }
michael@0 346 },
michael@0 347
michael@0 348 _resolveImageLink: function Feed_resolveImageLink() {
michael@0 349 var base;
michael@0 350 if (bagHasKey(this.image, "xml:base"))
michael@0 351 base = this.image.getPropertyAsAString("xml:base");
michael@0 352 var url = this._resolveURI(this.image.getPropertyAsAString("url"), base);
michael@0 353 if (url)
michael@0 354 this.image.setPropertyAsAString("url", url.spec);
michael@0 355 },
michael@0 356
michael@0 357 _resolveURI: function Feed_resolveURI(linkSpec, baseSpec) {
michael@0 358 var uri = null;
michael@0 359 try {
michael@0 360 var base = baseSpec ? strToURI(baseSpec, this.baseURI) : this.baseURI;
michael@0 361 uri = strToURI(linkSpec, base);
michael@0 362 }
michael@0 363 catch(e) {
michael@0 364 LOG(e);
michael@0 365 }
michael@0 366
michael@0 367 return uri;
michael@0 368 },
michael@0 369
michael@0 370 // reset the bag to raw contents, not text constructs
michael@0 371 _resetBagMembersToRawText: function Feed_resetBagMembers(fieldLists) {
michael@0 372 for (var i=0; i<fieldLists.length; i++) {
michael@0 373 for (var j=0; j<fieldLists[i].length; j++) {
michael@0 374 if (bagHasKey(this.fields, fieldLists[i][j])) {
michael@0 375 var textConstruct = this.fields.getProperty(fieldLists[i][j]);
michael@0 376 this.fields.setPropertyAsAString(fieldLists[i][j],
michael@0 377 textConstruct.text);
michael@0 378 }
michael@0 379 }
michael@0 380 }
michael@0 381 },
michael@0 382
michael@0 383 // XPCOM stuff
michael@0 384 classID: FEED_CLASSID,
michael@0 385 QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeed, Ci.nsIFeedContainer])
michael@0 386 }
michael@0 387
michael@0 388 function Entry() {
michael@0 389 this.summary = null;
michael@0 390 this.content = null;
michael@0 391 this.title = null;
michael@0 392 this.fields = Cc["@mozilla.org/hash-property-bag;1"].
michael@0 393 createInstance(Ci.nsIWritablePropertyBag2);
michael@0 394 this.link = null;
michael@0 395 this.id = null;
michael@0 396 this.baseURI = null;
michael@0 397 this.updated = null;
michael@0 398 this.published = null;
michael@0 399 this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
michael@0 400 this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
michael@0 401 }
michael@0 402
michael@0 403 Entry.prototype = {
michael@0 404 fields: null,
michael@0 405 enclosures: null,
michael@0 406 mediaContent: null,
michael@0 407
michael@0 408 searchLists: {
michael@0 409 title: ["title", "rss1:title", "atom03:title", "atom:title"],
michael@0 410 link: [["link",strToURI],["rss1:link",strToURI]],
michael@0 411 id: [["guid", makePropGetter("guid")], "rdf:about",
michael@0 412 "atom03:id", "atom:id"],
michael@0 413 authors : ["authors"],
michael@0 414 contributors: ["contributors"],
michael@0 415 summary: ["description", "rss1:description", "dc:description",
michael@0 416 "atom03:summary", "atom:summary"],
michael@0 417 content: ["content:encoded","atom03:content","atom:content"],
michael@0 418 rights: ["atom03:rights","atom:rights"],
michael@0 419 published: ["pubDate", "atom03:issued", "dcterms:issued", "atom:published"],
michael@0 420 updated: ["pubDate", "atom03:modified", "dc:date", "dcterms:modified",
michael@0 421 "atom:updated"]
michael@0 422 },
michael@0 423
michael@0 424 normalize: function Entry_normalize() {
michael@0 425 fieldsToObj(this, this.searchLists);
michael@0 426
michael@0 427 // Assign Atom link if needed
michael@0 428 if (bagHasKey(this.fields, "links"))
michael@0 429 this._atomLinksToURI();
michael@0 430
michael@0 431 // Populate enclosures array
michael@0 432 this._populateEnclosures();
michael@0 433
michael@0 434 // The link might be a guid w/ permalink=true
michael@0 435 if (!this.link && bagHasKey(this.fields, "guid")) {
michael@0 436 var guid = this.fields.getProperty("guid");
michael@0 437 var isPermaLink = true;
michael@0 438
michael@0 439 if (bagHasKey(guid, "isPermaLink"))
michael@0 440 isPermaLink = guid.getProperty("isPermaLink").toLowerCase() != "false";
michael@0 441
michael@0 442 if (guid && isPermaLink)
michael@0 443 this.link = strToURI(guid.getProperty("guid"));
michael@0 444 }
michael@0 445
michael@0 446 if (this.updated)
michael@0 447 this.updated = dateParse(this.updated);
michael@0 448 if (this.published)
michael@0 449 this.published = dateParse(this.published);
michael@0 450
michael@0 451 this._resetBagMembersToRawText([this.searchLists.content,
michael@0 452 this.searchLists.summary,
michael@0 453 this.searchLists.title]);
michael@0 454 },
michael@0 455
michael@0 456 _populateEnclosures: function Entry_populateEnclosures() {
michael@0 457 if (bagHasKey(this.fields, "links"))
michael@0 458 this._atomLinksToEnclosures();
michael@0 459
michael@0 460 // Add RSS2 enclosure to enclosures
michael@0 461 if (bagHasKey(this.fields, "enclosure"))
michael@0 462 this._enclosureToEnclosures();
michael@0 463
michael@0 464 // Add media:content to enclosures
michael@0 465 if (bagHasKey(this.fields, "mediacontent"))
michael@0 466 this._mediacontentToEnclosures();
michael@0 467
michael@0 468 // Add media:content in media:group to enclosures
michael@0 469 if (bagHasKey(this.fields, "mediagroup"))
michael@0 470 this._mediagroupToEnclosures();
michael@0 471 },
michael@0 472
michael@0 473 __enclosure_map: null,
michael@0 474
michael@0 475 _addToEnclosures: function Entry_addToEnclosures(new_enc) {
michael@0 476 // items we add to the enclosures array get displayed in the FeedWriter and
michael@0 477 // they must have non-empty urls.
michael@0 478 if (!bagHasKey(new_enc, "url") || new_enc.getPropertyAsAString("url") == "")
michael@0 479 return;
michael@0 480
michael@0 481 if (this.__enclosure_map == null)
michael@0 482 this.__enclosure_map = {};
michael@0 483
michael@0 484 var previous_enc = this.__enclosure_map[new_enc.getPropertyAsAString("url")];
michael@0 485
michael@0 486 if (previous_enc != undefined) {
michael@0 487 previous_enc.QueryInterface(Ci.nsIWritablePropertyBag2);
michael@0 488
michael@0 489 if (!bagHasKey(previous_enc, "type") && bagHasKey(new_enc, "type"))
michael@0 490 previous_enc.setPropertyAsAString("type", new_enc.getPropertyAsAString("type"));
michael@0 491
michael@0 492 if (!bagHasKey(previous_enc, "length") && bagHasKey(new_enc, "length"))
michael@0 493 previous_enc.setPropertyAsAString("length", new_enc.getPropertyAsAString("length"));
michael@0 494
michael@0 495 return;
michael@0 496 }
michael@0 497
michael@0 498 if (this.enclosures == null) {
michael@0 499 this.enclosures = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray);
michael@0 500 this.enclosures.QueryInterface(Ci.nsIMutableArray);
michael@0 501 }
michael@0 502
michael@0 503 this.enclosures.appendElement(new_enc, false);
michael@0 504 this.__enclosure_map[new_enc.getPropertyAsAString("url")] = new_enc;
michael@0 505 },
michael@0 506
michael@0 507 _atomLinksToEnclosures: function Entry_linkToEnclosure() {
michael@0 508 var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray);
michael@0 509 var enc_links = findAtomLinks("enclosure", links);
michael@0 510 if (enc_links.length == 0)
michael@0 511 return;
michael@0 512
michael@0 513 for (var i = 0; i < enc_links.length; ++i) {
michael@0 514 var link = enc_links[i];
michael@0 515
michael@0 516 // an enclosure must have an href
michael@0 517 if (!(link.getProperty("href")))
michael@0 518 return;
michael@0 519
michael@0 520 var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
michael@0 521
michael@0 522 // copy Atom bits over to equivalent enclosure bits
michael@0 523 enc.setPropertyAsAString("url", link.getPropertyAsAString("href"));
michael@0 524 if (bagHasKey(link, "type"))
michael@0 525 enc.setPropertyAsAString("type", link.getPropertyAsAString("type"));
michael@0 526 if (bagHasKey(link, "length"))
michael@0 527 enc.setPropertyAsAString("length", link.getPropertyAsAString("length"));
michael@0 528
michael@0 529 this._addToEnclosures(enc);
michael@0 530 }
michael@0 531 },
michael@0 532
michael@0 533 _enclosureToEnclosures: function Entry_enclosureToEnclosures() {
michael@0 534 var enc = this.fields.getPropertyAsInterface("enclosure", Ci.nsIPropertyBag2);
michael@0 535
michael@0 536 if (!(enc.getProperty("url")))
michael@0 537 return;
michael@0 538
michael@0 539 this._addToEnclosures(enc);
michael@0 540 },
michael@0 541
michael@0 542 _mediacontentToEnclosures: function Entry_mediacontentToEnclosures() {
michael@0 543 var mediacontent = this.fields.getPropertyAsInterface("mediacontent", Ci.nsIArray);
michael@0 544
michael@0 545 for (var i = 0; i < mediacontent.length; ++i) {
michael@0 546 var contentElement = mediacontent.queryElementAt(i, Ci.nsIWritablePropertyBag2);
michael@0 547
michael@0 548 // media:content don't require url, but if it's not there, we should
michael@0 549 // skip it.
michael@0 550 if (!bagHasKey(contentElement, "url"))
michael@0 551 continue;
michael@0 552
michael@0 553 var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
michael@0 554
michael@0 555 // copy media:content bits over to equivalent enclosure bits
michael@0 556 enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url"));
michael@0 557 if (bagHasKey(contentElement, "type")) {
michael@0 558 enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type"));
michael@0 559 }
michael@0 560 if (bagHasKey(contentElement, "fileSize")) {
michael@0 561 enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize"));
michael@0 562 }
michael@0 563
michael@0 564 this._addToEnclosures(enc);
michael@0 565 }
michael@0 566 },
michael@0 567
michael@0 568 _mediagroupToEnclosures: function Entry_mediagroupToEnclosures() {
michael@0 569 var group = this.fields.getPropertyAsInterface("mediagroup", Ci.nsIPropertyBag2);
michael@0 570
michael@0 571 var content = group.getPropertyAsInterface("mediacontent", Ci.nsIArray);
michael@0 572 for (var i = 0; i < content.length; ++i) {
michael@0 573 var contentElement = content.queryElementAt(i, Ci.nsIWritablePropertyBag2);
michael@0 574 // media:content don't require url, but if it's not there, we should
michael@0 575 // skip it.
michael@0 576 if (!bagHasKey(contentElement, "url"))
michael@0 577 continue;
michael@0 578
michael@0 579 var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
michael@0 580
michael@0 581 // copy media:content bits over to equivalent enclosure bits
michael@0 582 enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url"));
michael@0 583 if (bagHasKey(contentElement, "type")) {
michael@0 584 enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type"));
michael@0 585 }
michael@0 586 if (bagHasKey(contentElement, "fileSize")) {
michael@0 587 enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize"));
michael@0 588 }
michael@0 589
michael@0 590 this._addToEnclosures(enc);
michael@0 591 }
michael@0 592 },
michael@0 593
michael@0 594 // XPCOM stuff
michael@0 595 classID: ENTRY_CLASSID,
michael@0 596 QueryInterface: XPCOMUtils.generateQI(
michael@0 597 [Ci.nsIFeedEntry, Ci.nsIFeedContainer]
michael@0 598 )
michael@0 599 }
michael@0 600
michael@0 601 Entry.prototype._atomLinksToURI = Feed.prototype._atomLinksToURI;
michael@0 602 Entry.prototype._resolveURI = Feed.prototype._resolveURI;
michael@0 603 Entry.prototype._resetBagMembersToRawText =
michael@0 604 Feed.prototype._resetBagMembersToRawText;
michael@0 605
michael@0 606 // TextConstruct represents and element that could contain (X)HTML
michael@0 607 function TextConstruct() {
michael@0 608 this.lang = null;
michael@0 609 this.base = null;
michael@0 610 this.type = "text";
michael@0 611 this.text = null;
michael@0 612 this.parserUtils = Cc[PARSERUTILS_CONTRACTID].getService(Ci.nsIParserUtils);
michael@0 613 }
michael@0 614
michael@0 615 TextConstruct.prototype = {
michael@0 616 plainText: function TC_plainText() {
michael@0 617 if (this.type != "text") {
michael@0 618 return this.parserUtils.convertToPlainText(stripTags(this.text),
michael@0 619 Ci.nsIDocumentEncoder.OutputSelectionOnly |
michael@0 620 Ci.nsIDocumentEncoder.OutputAbsoluteLinks,
michael@0 621 0);
michael@0 622 }
michael@0 623 return this.text;
michael@0 624 },
michael@0 625
michael@0 626 createDocumentFragment: function TC_createDocumentFragment(element) {
michael@0 627 if (this.type == "text") {
michael@0 628 var doc = element.ownerDocument;
michael@0 629 var docFragment = doc.createDocumentFragment();
michael@0 630 var node = doc.createTextNode(this.text);
michael@0 631 docFragment.appendChild(node);
michael@0 632 return docFragment;
michael@0 633 }
michael@0 634 var isXML;
michael@0 635 if (this.type == "xhtml")
michael@0 636 isXML = true
michael@0 637 else if (this.type == "html")
michael@0 638 isXML = false;
michael@0 639 else
michael@0 640 return null;
michael@0 641
michael@0 642 return this.parserUtils.parseFragment(this.text, 0, isXML,
michael@0 643 this.base, element);
michael@0 644 },
michael@0 645
michael@0 646 // XPCOM stuff
michael@0 647 classID: TEXTCONSTRUCT_CLASSID,
michael@0 648 QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedTextConstruct])
michael@0 649 }
michael@0 650
michael@0 651 // Generator represents the software that produced the feed
michael@0 652 function Generator() {
michael@0 653 this.lang = null;
michael@0 654 this.agent = null;
michael@0 655 this.version = null;
michael@0 656 this.uri = null;
michael@0 657
michael@0 658 // nsIFeedElementBase
michael@0 659 this._attributes = null;
michael@0 660 this.baseURI = null;
michael@0 661 }
michael@0 662
michael@0 663 Generator.prototype = {
michael@0 664
michael@0 665 get attributes() {
michael@0 666 return this._attributes;
michael@0 667 },
michael@0 668
michael@0 669 set attributes(value) {
michael@0 670 this._attributes = value;
michael@0 671 this.version = this._attributes.getValueFromName("","version");
michael@0 672 var uriAttribute = this._attributes.getValueFromName("","uri") ||
michael@0 673 this._attributes.getValueFromName("","url");
michael@0 674 this.uri = strToURI(uriAttribute, this.baseURI);
michael@0 675
michael@0 676 // RSS1
michael@0 677 uriAttribute = this._attributes.getValueFromName(RDF_NS,"resource");
michael@0 678 if (uriAttribute) {
michael@0 679 this.agent = uriAttribute;
michael@0 680 this.uri = strToURI(uriAttribute, this.baseURI);
michael@0 681 }
michael@0 682 },
michael@0 683
michael@0 684 // XPCOM stuff
michael@0 685 classID: GENERATOR_CLASSID,
michael@0 686 QueryInterface: XPCOMUtils.generateQI(
michael@0 687 [Ci.nsIFeedGenerator, Ci.nsIFeedElementBase]
michael@0 688 )
michael@0 689 }
michael@0 690
michael@0 691 function Person() {
michael@0 692 this.name = null;
michael@0 693 this.uri = null;
michael@0 694 this.email = null;
michael@0 695
michael@0 696 // nsIFeedElementBase
michael@0 697 this.attributes = null;
michael@0 698 this.baseURI = null;
michael@0 699 }
michael@0 700
michael@0 701 Person.prototype = {
michael@0 702 // XPCOM stuff
michael@0 703 classID: PERSON_CLASSID,
michael@0 704 QueryInterface: XPCOMUtils.generateQI(
michael@0 705 [Ci.nsIFeedPerson, Ci.nsIFeedElementBase]
michael@0 706 )
michael@0 707 }
michael@0 708
michael@0 709 /**
michael@0 710 * Map a list of fields into properties on a container.
michael@0 711 *
michael@0 712 * @param container An nsIFeedContainer
michael@0 713 * @param fields A list of fields to search for. List members can
michael@0 714 * be a list, in which case the second member is
michael@0 715 * transformation function (like parseInt).
michael@0 716 */
michael@0 717 function fieldsToObj(container, fields) {
michael@0 718 var props,prop,field,searchList;
michael@0 719 for (var key in fields) {
michael@0 720 searchList = fields[key];
michael@0 721 for (var i=0; i < searchList.length; ++i) {
michael@0 722 props = searchList[i];
michael@0 723 prop = null;
michael@0 724 field = isArray(props) ? props[0] : props;
michael@0 725 try {
michael@0 726 prop = container.fields.getProperty(field);
michael@0 727 }
michael@0 728 catch(e) {
michael@0 729 }
michael@0 730 if (prop) {
michael@0 731 prop = isArray(props) ? props[1](prop) : prop;
michael@0 732 container[key] = prop;
michael@0 733 }
michael@0 734 }
michael@0 735 }
michael@0 736 }
michael@0 737
michael@0 738 /**
michael@0 739 * Lower cases an element's localName property
michael@0 740 * @param element A DOM element.
michael@0 741 *
michael@0 742 * @returns The lower case localName property of the specified element
michael@0 743 */
michael@0 744 function LC(element) {
michael@0 745 return element.localName.toLowerCase();
michael@0 746 }
michael@0 747
michael@0 748 // TODO move these post-processor functions
michael@0 749 // create a generator element
michael@0 750 function atomGenerator(s, generator) {
michael@0 751 generator.QueryInterface(Ci.nsIFeedGenerator);
michael@0 752 generator.agent = s.trim();
michael@0 753 return generator;
michael@0 754 }
michael@0 755
michael@0 756 // post-process atom:logo to create an RSS2-like structure
michael@0 757 function atomLogo(s, logo) {
michael@0 758 logo.setPropertyAsAString("url", s.trim());
michael@0 759 }
michael@0 760
michael@0 761 // post-process an RSS category, map it to the Atom fields.
michael@0 762 function rssCatTerm(s, cat) {
michael@0 763 // add slash handling?
michael@0 764 cat.setPropertyAsAString("term", s.trim());
michael@0 765 return cat;
michael@0 766 }
michael@0 767
michael@0 768 // post-process a GUID
michael@0 769 function rssGuid(s, guid) {
michael@0 770 guid.setPropertyAsAString("guid", s.trim());
michael@0 771 return guid;
michael@0 772 }
michael@0 773
michael@0 774 // post-process an RSS author element
michael@0 775 //
michael@0 776 // It can contain a field like this:
michael@0 777 //
michael@0 778 // <author>lawyer@boyer.net (Lawyer Boyer)</author>
michael@0 779 //
michael@0 780 // or, delightfully, a field like this:
michael@0 781 //
michael@0 782 // <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
michael@0 783 //
michael@0 784 // We want to split this up and assign it to corresponding Atom
michael@0 785 // fields.
michael@0 786 //
michael@0 787 function rssAuthor(s,author) {
michael@0 788 author.QueryInterface(Ci.nsIFeedPerson);
michael@0 789 // check for RSS2 string format
michael@0 790 var chars = s.trim();
michael@0 791 var matches = chars.match(/(.*)\((.*)\)/);
michael@0 792 var emailCheck =
michael@0 793 /^([a-zA-Z0-9_\.\-])+\@(([a-zA-Z0-9\-])+\.)+([a-zA-Z0-9]{2,4})+$/;
michael@0 794 if (matches) {
michael@0 795 var match1 = matches[1].trim();
michael@0 796 var match2 = matches[2].trim();
michael@0 797 if (match2.indexOf("mailto:") == 0)
michael@0 798 match2 = match2.substring(7);
michael@0 799 if (emailCheck.test(match1)) {
michael@0 800 author.email = match1;
michael@0 801 author.name = match2;
michael@0 802 }
michael@0 803 else if (emailCheck.test(match2)) {
michael@0 804 author.email = match2;
michael@0 805 author.name = match1;
michael@0 806 }
michael@0 807 else {
michael@0 808 // put it back together
michael@0 809 author.name = match1 + " (" + match2 + ")";
michael@0 810 }
michael@0 811 }
michael@0 812 else {
michael@0 813 author.name = chars;
michael@0 814 if (chars.indexOf('@'))
michael@0 815 author.email = chars;
michael@0 816 }
michael@0 817 return author;
michael@0 818 }
michael@0 819
michael@0 820 //
michael@0 821 // skipHours and skipDays map to arrays, so we need to change the
michael@0 822 // string to an nsISupports in order to stick it in there.
michael@0 823 //
michael@0 824 function rssArrayElement(s) {
michael@0 825 var str = Cc["@mozilla.org/supports-string;1"].
michael@0 826 createInstance(Ci.nsISupportsString);
michael@0 827 str.data = s;
michael@0 828 str.QueryInterface(Ci.nsISupportsString);
michael@0 829 return str;
michael@0 830 }
michael@0 831
michael@0 832 /**
michael@0 833 * Tries parsing a string through the JavaScript Date object.
michael@0 834 * @param aDateString
michael@0 835 * A string that is supposedly an RFC822 or RFC3339 date.
michael@0 836 * @return A Date.toUTCString, or null if the string can't be parsed.
michael@0 837 */
michael@0 838 function dateParse(aDateString) {
michael@0 839 let dateString = aDateString.trim();
michael@0 840 // Without bug 682781 fixed, JS won't parse an RFC822 date with a Z for the
michael@0 841 // timezone, so convert to -00:00 which works for any date format.
michael@0 842 dateString = dateString.replace(/z$/i, "-00:00");
michael@0 843 let date = new Date(dateString);
michael@0 844 if (!isNaN(date)) {
michael@0 845 return date.toUTCString();
michael@0 846 }
michael@0 847 return null;
michael@0 848 }
michael@0 849
michael@0 850 const XHTML_NS = "http://www.w3.org/1999/xhtml";
michael@0 851
michael@0 852 // The XHTMLHandler handles inline XHTML found in things like atom:summary
michael@0 853 function XHTMLHandler(processor, isAtom) {
michael@0 854 this._buf = "";
michael@0 855 this._processor = processor;
michael@0 856 this._depth = 0;
michael@0 857 this._isAtom = isAtom;
michael@0 858 // a stack of lists tracking in-scope namespaces
michael@0 859 this._inScopeNS = [];
michael@0 860 }
michael@0 861
michael@0 862 // The fidelity can be improved here, to allow handling of stuff like
michael@0 863 // SVG and MathML. XXX
michael@0 864 XHTMLHandler.prototype = {
michael@0 865
michael@0 866 // look back up at the declared namespaces
michael@0 867 // we always use the same prefixes for our safe stuff
michael@0 868 _isInScope: function XH__isInScope(ns) {
michael@0 869 for (var i in this._inScopeNS) {
michael@0 870 for (var uri in this._inScopeNS[i]) {
michael@0 871 if (this._inScopeNS[i][uri] == ns)
michael@0 872 return true;
michael@0 873 }
michael@0 874 }
michael@0 875 return false;
michael@0 876 },
michael@0 877
michael@0 878 startDocument: function XH_startDocument() {
michael@0 879 },
michael@0 880 endDocument: function XH_endDocument() {
michael@0 881 },
michael@0 882 startElement: function XH_startElement(uri, localName, qName, attributes) {
michael@0 883 ++this._depth;
michael@0 884 this._inScopeNS.push([]);
michael@0 885
michael@0 886 // RFC4287 requires XHTML to be wrapped in a div that is *not* part of
michael@0 887 // the content. This prevents people from screwing up namespaces, but
michael@0 888 // we need to skip it here.
michael@0 889 if (this._isAtom && this._depth == 1 && localName == "div")
michael@0 890 return;
michael@0 891
michael@0 892 // If it's an XHTML element, record it. Otherwise, it's ignored.
michael@0 893 if (uri == XHTML_NS) {
michael@0 894 this._buf += "<" + localName;
michael@0 895 var uri;
michael@0 896 for (var i=0; i < attributes.length; ++i) {
michael@0 897 uri = attributes.getURI(i);
michael@0 898 // XHTML attributes aren't in a namespace
michael@0 899 if (uri == "") {
michael@0 900 this._buf += (" " + attributes.getLocalName(i) + "='" +
michael@0 901 xmlEscape(attributes.getValue(i)) + "'");
michael@0 902 } else {
michael@0 903 // write a small set of allowed attribute namespaces
michael@0 904 var prefix = gAllowedXHTMLNamespaces[uri];
michael@0 905 if (prefix != null) {
michael@0 906 // The attribute value we'll attempt to write
michael@0 907 var attributeValue = xmlEscape(attributes.getValue(i));
michael@0 908
michael@0 909 // it's an allowed attribute NS.
michael@0 910 // write the attribute
michael@0 911 this._buf += (" " + prefix + ":" +
michael@0 912 attributes.getLocalName(i) +
michael@0 913 "='" + attributeValue + "'");
michael@0 914
michael@0 915 // write an xmlns declaration if necessary
michael@0 916 if (prefix != "xml" && !this._isInScope(uri)) {
michael@0 917 this._inScopeNS[this._inScopeNS.length - 1].push(uri);
michael@0 918 this._buf += " xmlns:" + prefix + "='" + uri + "'";
michael@0 919 }
michael@0 920 }
michael@0 921 }
michael@0 922 }
michael@0 923 this._buf += ">";
michael@0 924 }
michael@0 925 },
michael@0 926 endElement: function XH_endElement(uri, localName, qName) {
michael@0 927 --this._depth;
michael@0 928 this._inScopeNS.pop();
michael@0 929
michael@0 930 // We need to skip outer divs in Atom. See comment in startElement.
michael@0 931 if (this._isAtom && this._depth == 0 && localName == "div")
michael@0 932 return;
michael@0 933
michael@0 934 // When we peek too far, go back to the main processor
michael@0 935 if (this._depth < 0) {
michael@0 936 this._processor.returnFromXHTMLHandler(this._buf.trim(),
michael@0 937 uri, localName, qName);
michael@0 938 return;
michael@0 939 }
michael@0 940 // If it's an XHTML element, record it. Otherwise, it's ignored.
michael@0 941 if (uri == XHTML_NS) {
michael@0 942 this._buf += "</" + localName + ">";
michael@0 943 }
michael@0 944 },
michael@0 945 characters: function XH_characters(data) {
michael@0 946 this._buf += xmlEscape(data);
michael@0 947 },
michael@0 948 startPrefixMapping: function XH_startPrefixMapping(prefix, uri) {
michael@0 949 },
michael@0 950 endPrefixMapping: function FP_endPrefixMapping(prefix) {
michael@0 951 },
michael@0 952 processingInstruction: function XH_processingInstruction() {
michael@0 953 },
michael@0 954 }
michael@0 955
michael@0 956 /**
michael@0 957 * The ExtensionHandler deals with elements we haven't explicitly
michael@0 958 * added to our transition table in the FeedProcessor.
michael@0 959 */
michael@0 960 function ExtensionHandler(processor) {
michael@0 961 this._buf = "";
michael@0 962 this._depth = 0;
michael@0 963 this._hasChildElements = false;
michael@0 964
michael@0 965 // The FeedProcessor
michael@0 966 this._processor = processor;
michael@0 967
michael@0 968 // Fields of the outermost extension element.
michael@0 969 this._localName = null;
michael@0 970 this._uri = null;
michael@0 971 this._qName = null;
michael@0 972 this._attrs = null;
michael@0 973 }
michael@0 974
michael@0 975 ExtensionHandler.prototype = {
michael@0 976 startDocument: function EH_startDocument() {
michael@0 977 },
michael@0 978 endDocument: function EH_endDocument() {
michael@0 979 },
michael@0 980 startElement: function EH_startElement(uri, localName, qName, attrs) {
michael@0 981 ++this._depth;
michael@0 982 var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : "";
michael@0 983 var key = prefix + localName;
michael@0 984
michael@0 985 if (this._depth == 1) {
michael@0 986 this._uri = uri;
michael@0 987 this._localName = localName;
michael@0 988 this._qName = qName;
michael@0 989 this._attrs = attrs;
michael@0 990 }
michael@0 991
michael@0 992 // if we descend into another element, we won't send text
michael@0 993 this._hasChildElements = (this._depth > 1);
michael@0 994
michael@0 995 },
michael@0 996 endElement: function EH_endElement(uri, localName, qName) {
michael@0 997 --this._depth;
michael@0 998 if (this._depth == 0) {
michael@0 999 var text = this._hasChildElements ? null : this._buf.trim();
michael@0 1000 this._processor.returnFromExtHandler(this._uri, this._localName,
michael@0 1001 text, this._attrs);
michael@0 1002 }
michael@0 1003 },
michael@0 1004 characters: function EH_characters(data) {
michael@0 1005 if (!this._hasChildElements)
michael@0 1006 this._buf += data;
michael@0 1007 },
michael@0 1008 startPrefixMapping: function EH_startPrefixMapping() {
michael@0 1009 },
michael@0 1010 endPrefixMapping: function EH_endPrefixMapping() {
michael@0 1011 },
michael@0 1012 processingInstruction: function EH_processingInstruction() {
michael@0 1013 },
michael@0 1014 };
michael@0 1015
michael@0 1016
michael@0 1017 /**
michael@0 1018 * ElementInfo is a simple container object that describes
michael@0 1019 * some characteristics of a feed element. For example, it
michael@0 1020 * says whether an element can be expected to appear more
michael@0 1021 * than once inside a given entry or feed.
michael@0 1022 */
michael@0 1023 function ElementInfo(fieldName, containerClass, closeFunc, isArray) {
michael@0 1024 this.fieldName = fieldName;
michael@0 1025 this.containerClass = containerClass;
michael@0 1026 this.closeFunc = closeFunc;
michael@0 1027 this.isArray = isArray;
michael@0 1028 this.isWrapper = false;
michael@0 1029 }
michael@0 1030
michael@0 1031 /**
michael@0 1032 * FeedElementInfo represents a feed element, usually the root.
michael@0 1033 */
michael@0 1034 function FeedElementInfo(fieldName, feedVersion) {
michael@0 1035 this.isWrapper = false;
michael@0 1036 this.fieldName = fieldName;
michael@0 1037 this.feedVersion = feedVersion;
michael@0 1038 }
michael@0 1039
michael@0 1040 /**
michael@0 1041 * Some feed formats include vestigial wrapper elements that we don't
michael@0 1042 * want to include in our object model, but we do need to keep track
michael@0 1043 * of during parsing.
michael@0 1044 */
michael@0 1045 function WrapperElementInfo(fieldName) {
michael@0 1046 this.isWrapper = true;
michael@0 1047 this.fieldName = fieldName;
michael@0 1048 }
michael@0 1049
michael@0 1050 /***** The Processor *****/
michael@0 1051 function FeedProcessor() {
michael@0 1052 this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader);
michael@0 1053 this._buf = "";
michael@0 1054 this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
michael@0 1055 this._handlerStack = [];
michael@0 1056 this._xmlBaseStack = []; // sparse array keyed to nesting depth
michael@0 1057 this._depth = 0;
michael@0 1058 this._state = "START";
michael@0 1059 this._result = null;
michael@0 1060 this._extensionHandler = null;
michael@0 1061 this._xhtmlHandler = null;
michael@0 1062 this._haveSentResult = false;
michael@0 1063
michael@0 1064 // The nsIFeedResultListener waiting for the parse results
michael@0 1065 this.listener = null;
michael@0 1066
michael@0 1067 // These elements can contain (X)HTML or plain text.
michael@0 1068 // We keep a table here that contains their default treatment
michael@0 1069 this._textConstructs = {"atom:title":"text",
michael@0 1070 "atom:summary":"text",
michael@0 1071 "atom:rights":"text",
michael@0 1072 "atom:content":"text",
michael@0 1073 "atom:subtitle":"text",
michael@0 1074 "description":"html",
michael@0 1075 "rss1:description":"html",
michael@0 1076 "dc:description":"html",
michael@0 1077 "content:encoded":"html",
michael@0 1078 "title":"text",
michael@0 1079 "rss1:title":"text",
michael@0 1080 "atom03:title":"text",
michael@0 1081 "atom03:tagline":"text",
michael@0 1082 "atom03:summary":"text",
michael@0 1083 "atom03:content":"text"};
michael@0 1084 this._stack = [];
michael@0 1085
michael@0 1086 this._trans = {
michael@0 1087 "START": {
michael@0 1088 //If we hit a root RSS element, treat as RSS2.
michael@0 1089 "rss": new FeedElementInfo("RSS2", "rss2"),
michael@0 1090
michael@0 1091 // If we hit an RDF element, if could be RSS1, but we can't
michael@0 1092 // verify that until we hit a rss1:channel element.
michael@0 1093 "rdf:RDF": new WrapperElementInfo("RDF"),
michael@0 1094
michael@0 1095 // If we hit a Atom 1.0 element, treat as Atom 1.0.
michael@0 1096 "atom:feed": new FeedElementInfo("Atom", "atom"),
michael@0 1097
michael@0 1098 // Treat as Atom 0.3
michael@0 1099 "atom03:feed": new FeedElementInfo("Atom03", "atom03"),
michael@0 1100 },
michael@0 1101
michael@0 1102 /********* RSS2 **********/
michael@0 1103 "IN_RSS2": {
michael@0 1104 "channel": new WrapperElementInfo("channel")
michael@0 1105 },
michael@0 1106
michael@0 1107 "IN_CHANNEL": {
michael@0 1108 "item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
michael@0 1109 "managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1110 rssAuthor, true),
michael@0 1111 "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1112 rssAuthor, true),
michael@0 1113 "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1114 rssAuthor, true),
michael@0 1115 "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
michael@0 1116 rssAuthor, true),
michael@0 1117 "category": new ElementInfo("categories", null, rssCatTerm, true),
michael@0 1118 "cloud": new ElementInfo("cloud", null, null, false),
michael@0 1119 "image": new ElementInfo("image", null, null, false),
michael@0 1120 "textInput": new ElementInfo("textInput", null, null, false),
michael@0 1121 "skipDays": new ElementInfo("skipDays", null, null, false),
michael@0 1122 "skipHours": new ElementInfo("skipHours", null, null, false),
michael@0 1123 "generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
michael@0 1124 atomGenerator, false),
michael@0 1125 },
michael@0 1126
michael@0 1127 "IN_ITEMS": {
michael@0 1128 "author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1129 rssAuthor, true),
michael@0 1130 "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1131 rssAuthor, true),
michael@0 1132 "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1133 rssAuthor, true),
michael@0 1134 "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
michael@0 1135 rssAuthor, true),
michael@0 1136 "category": new ElementInfo("categories", null, rssCatTerm, true),
michael@0 1137 "enclosure": new ElementInfo("enclosure", null, null, false),
michael@0 1138 "media:content": new ElementInfo("mediacontent", null, null, true),
michael@0 1139 "media:group": new ElementInfo("mediagroup", null, null, false),
michael@0 1140 "guid": new ElementInfo("guid", null, rssGuid, false)
michael@0 1141 },
michael@0 1142
michael@0 1143 "IN_SKIPDAYS": {
michael@0 1144 "day": new ElementInfo("days", null, rssArrayElement, true)
michael@0 1145 },
michael@0 1146
michael@0 1147 "IN_SKIPHOURS":{
michael@0 1148 "hour": new ElementInfo("hours", null, rssArrayElement, true)
michael@0 1149 },
michael@0 1150
michael@0 1151 "IN_MEDIAGROUP": {
michael@0 1152 "media:content": new ElementInfo("mediacontent", null, null, true)
michael@0 1153 },
michael@0 1154
michael@0 1155 /********* RSS1 **********/
michael@0 1156 "IN_RDF": {
michael@0 1157 // If we hit a rss1:channel, we can verify that we have RSS1
michael@0 1158 "rss1:channel": new FeedElementInfo("rdf_channel", "rss1"),
michael@0 1159 "rss1:image": new ElementInfo("image", null, null, false),
michael@0 1160 "rss1:textinput": new ElementInfo("textInput", null, null, false),
michael@0 1161 "rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true),
michael@0 1162 },
michael@0 1163
michael@0 1164 "IN_RDF_CHANNEL": {
michael@0 1165 "admin:generatorAgent": new ElementInfo("generator",
michael@0 1166 Cc[GENERATOR_CONTRACTID],
michael@0 1167 null, false),
michael@0 1168 "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1169 rssAuthor, true),
michael@0 1170 "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1171 rssAuthor, true),
michael@0 1172 "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
michael@0 1173 rssAuthor, true),
michael@0 1174 },
michael@0 1175
michael@0 1176 /********* ATOM 1.0 **********/
michael@0 1177 "IN_ATOM": {
michael@0 1178 "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1179 null, true),
michael@0 1180 "atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
michael@0 1181 atomGenerator, false),
michael@0 1182 "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
michael@0 1183 null, true),
michael@0 1184 "atom:link": new ElementInfo("links", null, null, true),
michael@0 1185 "atom:logo": new ElementInfo("atom:logo", null, atomLogo, false),
michael@0 1186 "atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID],
michael@0 1187 null, true)
michael@0 1188 },
michael@0 1189
michael@0 1190 "IN_ENTRIES": {
michael@0 1191 "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1192 null, true),
michael@0 1193 "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID],
michael@0 1194 null, true),
michael@0 1195 "atom:link": new ElementInfo("links", null, null, true),
michael@0 1196 },
michael@0 1197
michael@0 1198 /********* ATOM 0.3 **********/
michael@0 1199 "IN_ATOM03": {
michael@0 1200 "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1201 null, true),
michael@0 1202 "atom03:contributor": new ElementInfo("contributors",
michael@0 1203 Cc[PERSON_CONTRACTID],
michael@0 1204 null, true),
michael@0 1205 "atom03:link": new ElementInfo("links", null, null, true),
michael@0 1206 "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
michael@0 1207 null, true),
michael@0 1208 "atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID],
michael@0 1209 atomGenerator, false),
michael@0 1210 },
michael@0 1211
michael@0 1212 "IN_ATOM03_ENTRIES": {
michael@0 1213 "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID],
michael@0 1214 null, true),
michael@0 1215 "atom03:contributor": new ElementInfo("contributors",
michael@0 1216 Cc[PERSON_CONTRACTID],
michael@0 1217 null, true),
michael@0 1218 "atom03:link": new ElementInfo("links", null, null, true),
michael@0 1219 "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID],
michael@0 1220 null, true)
michael@0 1221 }
michael@0 1222 }
michael@0 1223 }
michael@0 1224
michael@0 1225 // See startElement for a long description of how feeds are processed.
michael@0 1226 FeedProcessor.prototype = {
michael@0 1227
michael@0 1228 // Set ourselves as the SAX handler, and set the base URI
michael@0 1229 _init: function FP_init(uri) {
michael@0 1230 this._reader.contentHandler = this;
michael@0 1231 this._reader.errorHandler = this;
michael@0 1232 this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult);
michael@0 1233 if (uri) {
michael@0 1234 this._result.uri = uri;
michael@0 1235 this._reader.baseURI = uri;
michael@0 1236 this._xmlBaseStack[0] = uri;
michael@0 1237 }
michael@0 1238 },
michael@0 1239
michael@0 1240 // This function is called once we figure out what type of feed
michael@0 1241 // we're dealing with. Some feed types require digging a bit further
michael@0 1242 // than the root.
michael@0 1243 _docVerified: function FP_docVerified(version) {
michael@0 1244 this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed);
michael@0 1245 this._result.doc.baseURI =
michael@0 1246 this._xmlBaseStack[this._xmlBaseStack.length - 1];
michael@0 1247 this._result.doc.fields = this._feed;
michael@0 1248 this._result.version = version;
michael@0 1249 },
michael@0 1250
michael@0 1251 // When we're done with the feed, let the listener know what
michael@0 1252 // happened.
michael@0 1253 _sendResult: function FP_sendResult() {
michael@0 1254 this._haveSentResult = true;
michael@0 1255 try {
michael@0 1256 // Can be null when a non-feed is fed to us
michael@0 1257 if (this._result.doc)
michael@0 1258 this._result.doc.normalize();
michael@0 1259 }
michael@0 1260 catch (e) {
michael@0 1261 LOG("FIXME: " + e);
michael@0 1262 }
michael@0 1263
michael@0 1264 try {
michael@0 1265 if (this.listener != null)
michael@0 1266 this.listener.handleResult(this._result);
michael@0 1267 }
michael@0 1268 finally {
michael@0 1269 this._result = null;
michael@0 1270 }
michael@0 1271 },
michael@0 1272
michael@0 1273 // Parsing functions
michael@0 1274 parseFromStream: function FP_parseFromStream(stream, uri) {
michael@0 1275 this._init(uri);
michael@0 1276 this._reader.parseFromStream(stream, null, stream.available(),
michael@0 1277 "application/xml");
michael@0 1278 this._reader = null;
michael@0 1279 },
michael@0 1280
michael@0 1281 parseFromString: function FP_parseFromString(inputString, uri) {
michael@0 1282 this._init(uri);
michael@0 1283 this._reader.parseFromString(inputString, "application/xml");
michael@0 1284 this._reader = null;
michael@0 1285 },
michael@0 1286
michael@0 1287 parseAsync: function FP_parseAsync(requestObserver, uri) {
michael@0 1288 this._init(uri);
michael@0 1289 this._reader.parseAsync(requestObserver);
michael@0 1290 },
michael@0 1291
michael@0 1292 // nsIStreamListener
michael@0 1293
michael@0 1294 // The XMLReader will throw sensible exceptions if these get called
michael@0 1295 // out of order.
michael@0 1296 onStartRequest: function FP_onStartRequest(request, context) {
michael@0 1297 // this will throw if the request is not a channel, but so will nsParser.
michael@0 1298 var channel = request.QueryInterface(Ci.nsIChannel);
michael@0 1299 channel.contentType = "application/vnd.mozilla.maybe.feed";
michael@0 1300 this._reader.onStartRequest(request, context);
michael@0 1301 },
michael@0 1302
michael@0 1303 onStopRequest: function FP_onStopRequest(request, context, statusCode) {
michael@0 1304 try {
michael@0 1305 this._reader.onStopRequest(request, context, statusCode);
michael@0 1306 }
michael@0 1307 finally {
michael@0 1308 this._reader = null;
michael@0 1309 }
michael@0 1310 },
michael@0 1311
michael@0 1312 onDataAvailable:
michael@0 1313 function FP_onDataAvailable(request, context, inputStream, offset, count) {
michael@0 1314 this._reader.onDataAvailable(request, context, inputStream, offset, count);
michael@0 1315 },
michael@0 1316
michael@0 1317 // nsISAXErrorHandler
michael@0 1318
michael@0 1319 // We only care about fatal errors. When this happens, we may have
michael@0 1320 // parsed through the feed metadata and some number of entries. The
michael@0 1321 // listener can still show some of that data if it wants, and we'll
michael@0 1322 // set the bozo bit to indicate we were unable to parse all the way
michael@0 1323 // through.
michael@0 1324 fatalError: function FP_reportError() {
michael@0 1325 this._result.bozo = true;
michael@0 1326 //XXX need to QI to FeedProgressListener
michael@0 1327 if (!this._haveSentResult)
michael@0 1328 this._sendResult();
michael@0 1329 },
michael@0 1330
michael@0 1331 // nsISAXContentHandler
michael@0 1332
michael@0 1333 startDocument: function FP_startDocument() {
michael@0 1334 //LOG("----------");
michael@0 1335 },
michael@0 1336
michael@0 1337 endDocument: function FP_endDocument() {
michael@0 1338 if (!this._haveSentResult)
michael@0 1339 this._sendResult();
michael@0 1340 },
michael@0 1341
michael@0 1342 // The transitions defined above identify elements that contain more
michael@0 1343 // than just text. For example RSS items contain many fields, and so
michael@0 1344 // do Atom authors. The only commonly used elements that contain
michael@0 1345 // mixed content are Atom Text Constructs of type="xhtml", which we
michael@0 1346 // delegate to another handler for cleaning. That leaves a couple
michael@0 1347 // different types of elements to deal with: those that should occur
michael@0 1348 // only once, such as title elements, and those that can occur
michael@0 1349 // multiple times, such as the RSS category element and the Atom
michael@0 1350 // link element. Most of the RSS1/DC elements can occur multiple
michael@0 1351 // times in theory, but in practice, the only ones that do have
michael@0 1352 // analogues in Atom.
michael@0 1353 //
michael@0 1354 // Some elements are also groups of attributes or sub-elements,
michael@0 1355 // while others are simple text fields. For the most part, we don't
michael@0 1356 // have to pay explicit attention to the simple text elements,
michael@0 1357 // unless we want to post-process the resulting string to transform
michael@0 1358 // it into some richer object like a Date or URI.
michael@0 1359 //
michael@0 1360 // Elements that have more sophisticated content models still end up
michael@0 1361 // being dictionaries, whether they are based on attributes like RSS
michael@0 1362 // cloud, sub-elements like Atom author, or even items and
michael@0 1363 // entries. These elements are treated as "containers". It's
michael@0 1364 // theoretically possible for a container to have an attribute with
michael@0 1365 // the same universal name as a sub-element, but none of the feed
michael@0 1366 // formats allow this by default, and I don't of any extension that
michael@0 1367 // works this way.
michael@0 1368 //
michael@0 1369 startElement: function FP_startElement(uri, localName, qName, attributes) {
michael@0 1370 this._buf = "";
michael@0 1371 ++this._depth;
michael@0 1372 var elementInfo;
michael@0 1373
michael@0 1374 //LOG("<" + localName + ">");
michael@0 1375
michael@0 1376 // Check for xml:base
michael@0 1377 var base = attributes.getValueFromName(XMLNS, "base");
michael@0 1378 if (base) {
michael@0 1379 this._xmlBaseStack[this._depth] =
michael@0 1380 strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]);
michael@0 1381 }
michael@0 1382
michael@0 1383 // To identify the element we're dealing with, we look up the
michael@0 1384 // namespace URI in our gNamespaces dictionary, which will give us
michael@0 1385 // a "canonical" prefix for a namespace URI. For example, this
michael@0 1386 // allows Dublin Core "creator" elements to be consistently mapped
michael@0 1387 // to "dc:creator", for easy field access by consumer code. This
michael@0 1388 // strategy also happens to shorten up our state table.
michael@0 1389 var key = this._prefixForNS(uri) + localName;
michael@0 1390
michael@0 1391 // Check to see if we need to hand this off to our XHTML handler.
michael@0 1392 // The elements we're dealing with will look like this:
michael@0 1393 //
michael@0 1394 // <title type="xhtml">
michael@0 1395 // <div xmlns="http://www.w3.org/1999/xhtml">
michael@0 1396 // A title with <b>bold</b> and <i>italics</i>.
michael@0 1397 // </div>
michael@0 1398 // </title>
michael@0 1399 //
michael@0 1400 // When it returns in returnFromXHTMLHandler, the handler should
michael@0 1401 // give us back a string like this:
michael@0 1402 //
michael@0 1403 // "A title with <b>bold</b> and <i>italics</i>."
michael@0 1404 //
michael@0 1405 // The Atom spec explicitly says the div is not part of the content,
michael@0 1406 // and explicitly allows whitespace collapsing.
michael@0 1407 //
michael@0 1408 if ((this._result.version == "atom" || this._result.version == "atom03") &&
michael@0 1409 this._textConstructs[key] != null) {
michael@0 1410 var type = attributes.getValueFromName("","type");
michael@0 1411 if (type != null && type.indexOf("xhtml") >= 0) {
michael@0 1412 this._xhtmlHandler =
michael@0 1413 new XHTMLHandler(this, (this._result.version == "atom"));
michael@0 1414 this._reader.contentHandler = this._xhtmlHandler;
michael@0 1415 return;
michael@0 1416 }
michael@0 1417 }
michael@0 1418
michael@0 1419 // Check our current state, and see if that state has a defined
michael@0 1420 // transition. For example, this._trans["atom:entry"]["atom:author"]
michael@0 1421 // will have one, and it tells us to add an item to our authors array.
michael@0 1422 if (this._trans[this._state] && this._trans[this._state][key]) {
michael@0 1423 elementInfo = this._trans[this._state][key];
michael@0 1424 }
michael@0 1425 else {
michael@0 1426 // If we don't have a transition, hand off to extension handler
michael@0 1427 this._extensionHandler = new ExtensionHandler(this);
michael@0 1428 this._reader.contentHandler = this._extensionHandler;
michael@0 1429 this._extensionHandler.startElement(uri, localName, qName, attributes);
michael@0 1430 return;
michael@0 1431 }
michael@0 1432
michael@0 1433 // This distinguishes wrappers like 'channel' from elements
michael@0 1434 // we'd actually like to do something with (which will test true).
michael@0 1435 this._handlerStack[this._depth] = elementInfo;
michael@0 1436 if (elementInfo.isWrapper) {
michael@0 1437 this._state = "IN_" + elementInfo.fieldName.toUpperCase();
michael@0 1438 this._stack.push([this._feed, this._state]);
michael@0 1439 }
michael@0 1440 else if (elementInfo.feedVersion) {
michael@0 1441 this._state = "IN_" + elementInfo.fieldName.toUpperCase();
michael@0 1442
michael@0 1443 // Check for the older RSS2 variants
michael@0 1444 if (elementInfo.feedVersion == "rss2")
michael@0 1445 elementInfo.feedVersion = this._findRSSVersion(attributes);
michael@0 1446 else if (uri == RSS090NS)
michael@0 1447 elementInfo.feedVersion = "rss090";
michael@0 1448
michael@0 1449 this._docVerified(elementInfo.feedVersion);
michael@0 1450 this._stack.push([this._feed, this._state]);
michael@0 1451 this._mapAttributes(this._feed, attributes);
michael@0 1452 }
michael@0 1453 else {
michael@0 1454 this._state = this._processComplexElement(elementInfo, attributes);
michael@0 1455 }
michael@0 1456 },
michael@0 1457
michael@0 1458 // In the endElement handler, we decrement the stack and look
michael@0 1459 // for cleanup/transition functions to execute. The second part
michael@0 1460 // of the state transition works as above in startElement, but
michael@0 1461 // the state we're looking for is prefixed with an underscore
michael@0 1462 // to distinguish endElement events from startElement events.
michael@0 1463 endElement: function FP_endElement(uri, localName, qName) {
michael@0 1464 var elementInfo = this._handlerStack[this._depth];
michael@0 1465 //LOG("</" + localName + ">");
michael@0 1466 if (elementInfo && !elementInfo.isWrapper)
michael@0 1467 this._closeComplexElement(elementInfo);
michael@0 1468
michael@0 1469 // cut down xml:base context
michael@0 1470 if (this._xmlBaseStack.length == this._depth + 1)
michael@0 1471 this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth);
michael@0 1472
michael@0 1473 // our new state is whatever is at the top of the stack now
michael@0 1474 if (this._stack.length > 0)
michael@0 1475 this._state = this._stack[this._stack.length - 1][1];
michael@0 1476 this._handlerStack = this._handlerStack.slice(0, this._depth);
michael@0 1477 --this._depth;
michael@0 1478 },
michael@0 1479
michael@0 1480 // Buffer up character data. The buffer is cleared with every
michael@0 1481 // opening element.
michael@0 1482 characters: function FP_characters(data) {
michael@0 1483 this._buf += data;
michael@0 1484 },
michael@0 1485 // TODO: It would be nice to check new prefixes here, and if they
michael@0 1486 // don't conflict with the ones we've defined, throw them in a
michael@0 1487 // dictionary to check.
michael@0 1488 startPrefixMapping: function FP_startPrefixMapping(prefix, uri) {
michael@0 1489 },
michael@0 1490
michael@0 1491 endPrefixMapping: function FP_endPrefixMapping(prefix) {
michael@0 1492 },
michael@0 1493
michael@0 1494 processingInstruction: function FP_processingInstruction(target, data) {
michael@0 1495 if (target == "xml-stylesheet") {
michael@0 1496 var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/);
michael@0 1497 if (hrefAttribute && hrefAttribute.length == 2)
michael@0 1498 this._result.stylesheet = strToURI(hrefAttribute[1], this._result.uri);
michael@0 1499 }
michael@0 1500 },
michael@0 1501
michael@0 1502 // end of nsISAXContentHandler
michael@0 1503
michael@0 1504 // Handle our more complicated elements--those that contain
michael@0 1505 // attributes and child elements.
michael@0 1506 _processComplexElement:
michael@0 1507 function FP__processComplexElement(elementInfo, attributes) {
michael@0 1508 var obj, key, prefix;
michael@0 1509
michael@0 1510 // If the container is an entry/item, it'll need to have its
michael@0 1511 // more esoteric properties put in the 'fields' property bag.
michael@0 1512 if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) {
michael@0 1513 obj = elementInfo.containerClass.createInstance(Ci.nsIFeedEntry);
michael@0 1514 obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
michael@0 1515 this._mapAttributes(obj.fields, attributes);
michael@0 1516 }
michael@0 1517 else if (elementInfo.containerClass) {
michael@0 1518 obj = elementInfo.containerClass.createInstance(Ci.nsIFeedElementBase);
michael@0 1519 obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1];
michael@0 1520 obj.attributes = attributes; // just set the SAX attributes
michael@0 1521 }
michael@0 1522 else {
michael@0 1523 obj = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2);
michael@0 1524 this._mapAttributes(obj, attributes);
michael@0 1525 }
michael@0 1526
michael@0 1527 // We should have a container/propertyBag that's had its
michael@0 1528 // attributes processed. Now we need to attach it to its
michael@0 1529 // container.
michael@0 1530 var newProp;
michael@0 1531
michael@0 1532 // First we'll see what's on top of the stack.
michael@0 1533 var container = this._stack[this._stack.length - 1][0];
michael@0 1534
michael@0 1535 // Check to see if it has the property
michael@0 1536 var prop;
michael@0 1537 try {
michael@0 1538 prop = container.getProperty(elementInfo.fieldName);
michael@0 1539 }
michael@0 1540 catch(e) {
michael@0 1541 }
michael@0 1542
michael@0 1543 if (elementInfo.isArray) {
michael@0 1544 if (!prop) {
michael@0 1545 container.setPropertyAsInterface(elementInfo.fieldName,
michael@0 1546 Cc[ARRAY_CONTRACTID].
michael@0 1547 createInstance(Ci.nsIMutableArray));
michael@0 1548 }
michael@0 1549
michael@0 1550 newProp = container.getProperty(elementInfo.fieldName);
michael@0 1551 // XXX This QI should not be necessary, but XPConnect seems to fly
michael@0 1552 // off the handle in the browser, and loses track of the interface
michael@0 1553 // on large files. Bug 335638.
michael@0 1554 newProp.QueryInterface(Ci.nsIMutableArray);
michael@0 1555 newProp.appendElement(obj,false);
michael@0 1556
michael@0 1557 // If new object is an nsIFeedContainer, we want to deal with
michael@0 1558 // its member nsIPropertyBag instead.
michael@0 1559 if (isIFeedContainer(obj))
michael@0 1560 newProp = obj.fields;
michael@0 1561
michael@0 1562 }
michael@0 1563 else {
michael@0 1564 // If it doesn't, set it.
michael@0 1565 if (!prop) {
michael@0 1566 container.setPropertyAsInterface(elementInfo.fieldName,obj);
michael@0 1567 }
michael@0 1568 newProp = container.getProperty(elementInfo.fieldName);
michael@0 1569 }
michael@0 1570
michael@0 1571 // make our new state name, and push the property onto the stack
michael@0 1572 var newState = "IN_" + elementInfo.fieldName.toUpperCase();
michael@0 1573 this._stack.push([newProp, newState, obj]);
michael@0 1574 return newState;
michael@0 1575 },
michael@0 1576
michael@0 1577 // Sometimes we need reconcile the element content with the object
michael@0 1578 // model for a given feed. We use helper functions to do the
michael@0 1579 // munging, but we need to identify array types here, so the munging
michael@0 1580 // happens only to the last element of an array.
michael@0 1581 _closeComplexElement: function FP__closeComplexElement(elementInfo) {
michael@0 1582 var stateTuple = this._stack.pop();
michael@0 1583 var container = stateTuple[0];
michael@0 1584 var containerParent = stateTuple[2];
michael@0 1585 var element = null;
michael@0 1586 var isArray = isIArray(container);
michael@0 1587
michael@0 1588 // If it's an array and we have to post-process,
michael@0 1589 // grab the last element
michael@0 1590 if (isArray)
michael@0 1591 element = container.queryElementAt(container.length - 1, Ci.nsISupports);
michael@0 1592 else
michael@0 1593 element = container;
michael@0 1594
michael@0 1595 // Run the post-processing function if there is one.
michael@0 1596 if (elementInfo.closeFunc)
michael@0 1597 element = elementInfo.closeFunc(this._buf, element);
michael@0 1598
michael@0 1599 // If an nsIFeedContainer was on top of the stack,
michael@0 1600 // we need to normalize it
michael@0 1601 if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID])
michael@0 1602 containerParent.normalize();
michael@0 1603
michael@0 1604 // If it's an array, re-set the last element
michael@0 1605 if (isArray)
michael@0 1606 container.replaceElementAt(element, container.length - 1, false);
michael@0 1607 },
michael@0 1608
michael@0 1609 _prefixForNS: function FP_prefixForNS(uri) {
michael@0 1610 if (!uri)
michael@0 1611 return "";
michael@0 1612 var prefix = gNamespaces[uri];
michael@0 1613 if (prefix)
michael@0 1614 return prefix + ":";
michael@0 1615 if (uri.toLowerCase().indexOf("http://backend.userland.com") == 0)
michael@0 1616 return "";
michael@0 1617 else
michael@0 1618 return null;
michael@0 1619 },
michael@0 1620
michael@0 1621 _mapAttributes: function FP__mapAttributes(bag, attributes) {
michael@0 1622 // Cycle through the attributes, and set our properties using the
michael@0 1623 // prefix:localNames we find in our namespace dictionary.
michael@0 1624 for (var i = 0; i < attributes.length; ++i) {
michael@0 1625 var key = this._prefixForNS(attributes.getURI(i)) + attributes.getLocalName(i);
michael@0 1626 var val = attributes.getValue(i);
michael@0 1627 bag.setPropertyAsAString(key, val);
michael@0 1628 }
michael@0 1629 },
michael@0 1630
michael@0 1631 // Only for RSS2esque formats
michael@0 1632 _findRSSVersion: function FP__findRSSVersion(attributes) {
michael@0 1633 var versionAttr = attributes.getValueFromName("", "version").trim();
michael@0 1634 var versions = { "0.91":"rss091",
michael@0 1635 "0.92":"rss092",
michael@0 1636 "0.93":"rss093",
michael@0 1637 "0.94":"rss094" }
michael@0 1638 if (versions[versionAttr])
michael@0 1639 return versions[versionAttr];
michael@0 1640 if (versionAttr.substr(0,2) != "2.")
michael@0 1641 return "rssUnknown";
michael@0 1642 return "rss2";
michael@0 1643 },
michael@0 1644
michael@0 1645 // unknown element values are returned here. See startElement above
michael@0 1646 // for how this works.
michael@0 1647 returnFromExtHandler:
michael@0 1648 function FP_returnExt(uri, localName, chars, attributes) {
michael@0 1649 --this._depth;
michael@0 1650
michael@0 1651 // take control of the SAX events
michael@0 1652 this._reader.contentHandler = this;
michael@0 1653 if (localName == null && chars == null)
michael@0 1654 return;
michael@0 1655
michael@0 1656 // we don't take random elements inside rdf:RDF
michael@0 1657 if (this._state == "IN_RDF")
michael@0 1658 return;
michael@0 1659
michael@0 1660 // Grab the top of the stack
michael@0 1661 var top = this._stack[this._stack.length - 1];
michael@0 1662 if (!top)
michael@0 1663 return;
michael@0 1664
michael@0 1665 var container = top[0];
michael@0 1666 // Grab the last element if it's an array
michael@0 1667 if (isIArray(container)) {
michael@0 1668 var contract = this._handlerStack[this._depth].containerClass;
michael@0 1669 // check if it's something specific, but not an entry
michael@0 1670 if (contract && contract != Cc[ENTRY_CONTRACTID]) {
michael@0 1671 var el = container.queryElementAt(container.length - 1,
michael@0 1672 Ci.nsIFeedElementBase);
michael@0 1673 // XXX there must be a way to flatten these interfaces
michael@0 1674 if (contract == Cc[PERSON_CONTRACTID])
michael@0 1675 el.QueryInterface(Ci.nsIFeedPerson);
michael@0 1676 else
michael@0 1677 return; // don't know about this interface
michael@0 1678
michael@0 1679 var propName = localName;
michael@0 1680 var prefix = gNamespaces[uri];
michael@0 1681
michael@0 1682 // synonyms
michael@0 1683 if ((uri == "" ||
michael@0 1684 prefix &&
michael@0 1685 ((prefix.indexOf("atom") > -1) ||
michael@0 1686 (prefix.indexOf("rss") > -1))) &&
michael@0 1687 (propName == "url" || propName == "href"))
michael@0 1688 propName = "uri";
michael@0 1689
michael@0 1690 try {
michael@0 1691 if (el[propName] !== "undefined") {
michael@0 1692 var propValue = chars;
michael@0 1693 // convert URI-bearing values to an nsIURI
michael@0 1694 if (propName == "uri") {
michael@0 1695 var base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
michael@0 1696 propValue = strToURI(chars, base);
michael@0 1697 }
michael@0 1698 el[propName] = propValue;
michael@0 1699 }
michael@0 1700 }
michael@0 1701 catch(e) {
michael@0 1702 // ignore XPConnect errors
michael@0 1703 }
michael@0 1704 // the rest of the function deals with entry- and feed-level stuff
michael@0 1705 return;
michael@0 1706 }
michael@0 1707 else {
michael@0 1708 container = container.queryElementAt(container.length - 1,
michael@0 1709 Ci.nsIWritablePropertyBag2);
michael@0 1710 }
michael@0 1711 }
michael@0 1712
michael@0 1713 // Make the buffer our new property
michael@0 1714 var propName = this._prefixForNS(uri) + localName;
michael@0 1715
michael@0 1716 // But, it could be something containing HTML. If so,
michael@0 1717 // we need to know about that.
michael@0 1718 if (this._textConstructs[propName] != null &&
michael@0 1719 this._handlerStack[this._depth].containerClass !== null) {
michael@0 1720 var newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
michael@0 1721 createInstance(Ci.nsIFeedTextConstruct);
michael@0 1722 newProp.text = chars;
michael@0 1723 // Look up the default type in our table
michael@0 1724 var type = this._textConstructs[propName];
michael@0 1725 var typeAttribute = attributes.getValueFromName("","type");
michael@0 1726 if (this._result.version == "atom" && typeAttribute != null) {
michael@0 1727 type = typeAttribute;
michael@0 1728 }
michael@0 1729 else if (this._result.version == "atom03" && typeAttribute != null) {
michael@0 1730 if (typeAttribute.toLowerCase().indexOf("xhtml") >= 0) {
michael@0 1731 type = "xhtml";
michael@0 1732 }
michael@0 1733 else if (typeAttribute.toLowerCase().indexOf("html") >= 0) {
michael@0 1734 type = "html";
michael@0 1735 }
michael@0 1736 else if (typeAttribute.toLowerCase().indexOf("text") >= 0) {
michael@0 1737 type = "text";
michael@0 1738 }
michael@0 1739 }
michael@0 1740
michael@0 1741 // If it's rss feed-level description, it's not supposed to have html
michael@0 1742 if (this._result.version.indexOf("rss") >= 0 &&
michael@0 1743 this._handlerStack[this._depth].containerClass != ENTRY_CONTRACTID) {
michael@0 1744 type = "text";
michael@0 1745 }
michael@0 1746 newProp.type = type;
michael@0 1747 newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
michael@0 1748 container.setPropertyAsInterface(propName, newProp);
michael@0 1749 }
michael@0 1750 else {
michael@0 1751 container.setPropertyAsAString(propName, chars);
michael@0 1752 }
michael@0 1753 },
michael@0 1754
michael@0 1755 // Sometimes, we'll hand off SAX handling duties to an XHTMLHandler
michael@0 1756 // (see above) that will scrape out non-XHTML stuff, normalize
michael@0 1757 // namespaces, and remove the wrapper div from Atom 1.0. When the
michael@0 1758 // XHTMLHandler is done, it'll callback here.
michael@0 1759 returnFromXHTMLHandler:
michael@0 1760 function FP_returnFromXHTMLHandler(chars, uri, localName, qName) {
michael@0 1761 // retake control of the SAX content events
michael@0 1762 this._reader.contentHandler = this;
michael@0 1763
michael@0 1764 // Grab the top of the stack
michael@0 1765 var top = this._stack[this._stack.length - 1];
michael@0 1766 if (!top)
michael@0 1767 return;
michael@0 1768 var container = top[0];
michael@0 1769
michael@0 1770 // Assign the property
michael@0 1771 var newProp = newProp = Cc[TEXTCONSTRUCT_CONTRACTID].
michael@0 1772 createInstance(Ci.nsIFeedTextConstruct);
michael@0 1773 newProp.text = chars;
michael@0 1774 newProp.type = "xhtml";
michael@0 1775 newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1];
michael@0 1776 container.setPropertyAsInterface(this._prefixForNS(uri) + localName,
michael@0 1777 newProp);
michael@0 1778
michael@0 1779 // XHTML will cause us to peek too far. The XHTML handler will
michael@0 1780 // send us an end element to call. RFC4287-valid feeds allow a
michael@0 1781 // more graceful way to handle this. Unfortunately, we can't count
michael@0 1782 // on compliance at this point.
michael@0 1783 this.endElement(uri, localName, qName);
michael@0 1784 },
michael@0 1785
michael@0 1786 // XPCOM stuff
michael@0 1787 classID: FP_CLASSID,
michael@0 1788 QueryInterface: XPCOMUtils.generateQI(
michael@0 1789 [Ci.nsIFeedProcessor, Ci.nsISAXContentHandler, Ci.nsISAXErrorHandler,
michael@0 1790 Ci.nsIStreamListener, Ci.nsIRequestObserver]
michael@0 1791 )
michael@0 1792 }
michael@0 1793
michael@0 1794 var components = [FeedProcessor, FeedResult, Feed, Entry,
michael@0 1795 TextConstruct, Generator, Person];
michael@0 1796
michael@0 1797 this.NSGetFactory = XPCOMUtils.generateNSGetFactory(components);

mercurial