mobile/android/chrome/content/JSDOMParser.js

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
michael@0 3 * You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 /**
michael@0 6 * This is a relatively lightweight DOMParser that is safe to use in a web
michael@0 7 * worker. This is far from a complete DOM implementation; however, it should
michael@0 8 * contain the minimal set of functionality necessary for Readability.js.
michael@0 9 *
michael@0 10 * Aside from not implementing the full DOM API, there are other quirks to be
michael@0 11 * aware of when using the JSDOMParser:
michael@0 12 *
michael@0 13 * 1) Properly formed HTML/XML must be used. This means you should be extra
michael@0 14 * careful when using this parser on anything received directly from an
michael@0 15 * XMLHttpRequest. Providing a serialized string from an XMLSerializer,
michael@0 16 * however, should be safe (since the browser's XMLSerializer should
michael@0 17 * generate valid HTML/XML). Therefore, if parsing a document from an XHR,
michael@0 18 * the recommended approach is to do the XHR in the main thread, use
michael@0 19 * XMLSerializer.serializeToString() on the responseXML, and pass the
michael@0 20 * resulting string to the worker.
michael@0 21 *
michael@0 22 * 2) Live NodeLists are not supported. DOM methods and properties such as
michael@0 23 * getElementsByTagName() and childNodes return standard arrays. If you
michael@0 24 * want these lists to be updated when nodes are removed or added to the
michael@0 25 * document, you must take care to manually update them yourself.
michael@0 26 */
michael@0 27 (function (global) {
michael@0 28
michael@0 29 function error(m) {
michael@0 30 dump("JSDOMParser error: " + m);
michael@0 31 }
michael@0 32
michael@0 33 // When a style is set in JS, map it to the corresponding CSS attribute
michael@0 34 let styleMap = {
michael@0 35 "alignmentBaseline": "alignment-baseline",
michael@0 36 "background": "background",
michael@0 37 "backgroundAttachment": "background-attachment",
michael@0 38 "backgroundClip": "background-clip",
michael@0 39 "backgroundColor": "background-color",
michael@0 40 "backgroundImage": "background-image",
michael@0 41 "backgroundOrigin": "background-origin",
michael@0 42 "backgroundPosition": "background-position",
michael@0 43 "backgroundPositionX": "background-position-x",
michael@0 44 "backgroundPositionY": "background-position-y",
michael@0 45 "backgroundRepeat": "background-repeat",
michael@0 46 "backgroundRepeatX": "background-repeat-x",
michael@0 47 "backgroundRepeatY": "background-repeat-y",
michael@0 48 "backgroundSize": "background-size",
michael@0 49 "baselineShift": "baseline-shift",
michael@0 50 "border": "border",
michael@0 51 "borderBottom": "border-bottom",
michael@0 52 "borderBottomColor": "border-bottom-color",
michael@0 53 "borderBottomLeftRadius": "border-bottom-left-radius",
michael@0 54 "borderBottomRightRadius": "border-bottom-right-radius",
michael@0 55 "borderBottomStyle": "border-bottom-style",
michael@0 56 "borderBottomWidth": "border-bottom-width",
michael@0 57 "borderCollapse": "border-collapse",
michael@0 58 "borderColor": "border-color",
michael@0 59 "borderImage": "border-image",
michael@0 60 "borderImageOutset": "border-image-outset",
michael@0 61 "borderImageRepeat": "border-image-repeat",
michael@0 62 "borderImageSlice": "border-image-slice",
michael@0 63 "borderImageSource": "border-image-source",
michael@0 64 "borderImageWidth": "border-image-width",
michael@0 65 "borderLeft": "border-left",
michael@0 66 "borderLeftColor": "border-left-color",
michael@0 67 "borderLeftStyle": "border-left-style",
michael@0 68 "borderLeftWidth": "border-left-width",
michael@0 69 "borderRadius": "border-radius",
michael@0 70 "borderRight": "border-right",
michael@0 71 "borderRightColor": "border-right-color",
michael@0 72 "borderRightStyle": "border-right-style",
michael@0 73 "borderRightWidth": "border-right-width",
michael@0 74 "borderSpacing": "border-spacing",
michael@0 75 "borderStyle": "border-style",
michael@0 76 "borderTop": "border-top",
michael@0 77 "borderTopColor": "border-top-color",
michael@0 78 "borderTopLeftRadius": "border-top-left-radius",
michael@0 79 "borderTopRightRadius": "border-top-right-radius",
michael@0 80 "borderTopStyle": "border-top-style",
michael@0 81 "borderTopWidth": "border-top-width",
michael@0 82 "borderWidth": "border-width",
michael@0 83 "bottom": "bottom",
michael@0 84 "boxShadow": "box-shadow",
michael@0 85 "boxSizing": "box-sizing",
michael@0 86 "captionSide": "caption-side",
michael@0 87 "clear": "clear",
michael@0 88 "clip": "clip",
michael@0 89 "clipPath": "clip-path",
michael@0 90 "clipRule": "clip-rule",
michael@0 91 "color": "color",
michael@0 92 "colorInterpolation": "color-interpolation",
michael@0 93 "colorInterpolationFilters": "color-interpolation-filters",
michael@0 94 "colorProfile": "color-profile",
michael@0 95 "colorRendering": "color-rendering",
michael@0 96 "content": "content",
michael@0 97 "counterIncrement": "counter-increment",
michael@0 98 "counterReset": "counter-reset",
michael@0 99 "cursor": "cursor",
michael@0 100 "direction": "direction",
michael@0 101 "display": "display",
michael@0 102 "dominantBaseline": "dominant-baseline",
michael@0 103 "emptyCells": "empty-cells",
michael@0 104 "enableBackground": "enable-background",
michael@0 105 "fill": "fill",
michael@0 106 "fillOpacity": "fill-opacity",
michael@0 107 "fillRule": "fill-rule",
michael@0 108 "filter": "filter",
michael@0 109 "cssFloat": "float",
michael@0 110 "floodColor": "flood-color",
michael@0 111 "floodOpacity": "flood-opacity",
michael@0 112 "font": "font",
michael@0 113 "fontFamily": "font-family",
michael@0 114 "fontSize": "font-size",
michael@0 115 "fontStretch": "font-stretch",
michael@0 116 "fontStyle": "font-style",
michael@0 117 "fontVariant": "font-variant",
michael@0 118 "fontWeight": "font-weight",
michael@0 119 "glyphOrientationHorizontal": "glyph-orientation-horizontal",
michael@0 120 "glyphOrientationVertical": "glyph-orientation-vertical",
michael@0 121 "height": "height",
michael@0 122 "imageRendering": "image-rendering",
michael@0 123 "kerning": "kerning",
michael@0 124 "left": "left",
michael@0 125 "letterSpacing": "letter-spacing",
michael@0 126 "lightingColor": "lighting-color",
michael@0 127 "lineHeight": "line-height",
michael@0 128 "listStyle": "list-style",
michael@0 129 "listStyleImage": "list-style-image",
michael@0 130 "listStylePosition": "list-style-position",
michael@0 131 "listStyleType": "list-style-type",
michael@0 132 "margin": "margin",
michael@0 133 "marginBottom": "margin-bottom",
michael@0 134 "marginLeft": "margin-left",
michael@0 135 "marginRight": "margin-right",
michael@0 136 "marginTop": "margin-top",
michael@0 137 "marker": "marker",
michael@0 138 "markerEnd": "marker-end",
michael@0 139 "markerMid": "marker-mid",
michael@0 140 "markerStart": "marker-start",
michael@0 141 "mask": "mask",
michael@0 142 "maxHeight": "max-height",
michael@0 143 "maxWidth": "max-width",
michael@0 144 "minHeight": "min-height",
michael@0 145 "minWidth": "min-width",
michael@0 146 "opacity": "opacity",
michael@0 147 "orphans": "orphans",
michael@0 148 "outline": "outline",
michael@0 149 "outlineColor": "outline-color",
michael@0 150 "outlineOffset": "outline-offset",
michael@0 151 "outlineStyle": "outline-style",
michael@0 152 "outlineWidth": "outline-width",
michael@0 153 "overflow": "overflow",
michael@0 154 "overflowX": "overflow-x",
michael@0 155 "overflowY": "overflow-y",
michael@0 156 "padding": "padding",
michael@0 157 "paddingBottom": "padding-bottom",
michael@0 158 "paddingLeft": "padding-left",
michael@0 159 "paddingRight": "padding-right",
michael@0 160 "paddingTop": "padding-top",
michael@0 161 "page": "page",
michael@0 162 "pageBreakAfter": "page-break-after",
michael@0 163 "pageBreakBefore": "page-break-before",
michael@0 164 "pageBreakInside": "page-break-inside",
michael@0 165 "pointerEvents": "pointer-events",
michael@0 166 "position": "position",
michael@0 167 "quotes": "quotes",
michael@0 168 "resize": "resize",
michael@0 169 "right": "right",
michael@0 170 "shapeRendering": "shape-rendering",
michael@0 171 "size": "size",
michael@0 172 "speak": "speak",
michael@0 173 "src": "src",
michael@0 174 "stopColor": "stop-color",
michael@0 175 "stopOpacity": "stop-opacity",
michael@0 176 "stroke": "stroke",
michael@0 177 "strokeDasharray": "stroke-dasharray",
michael@0 178 "strokeDashoffset": "stroke-dashoffset",
michael@0 179 "strokeLinecap": "stroke-linecap",
michael@0 180 "strokeLinejoin": "stroke-linejoin",
michael@0 181 "strokeMiterlimit": "stroke-miterlimit",
michael@0 182 "strokeOpacity": "stroke-opacity",
michael@0 183 "strokeWidth": "stroke-width",
michael@0 184 "tableLayout": "table-layout",
michael@0 185 "textAlign": "text-align",
michael@0 186 "textAnchor": "text-anchor",
michael@0 187 "textDecoration": "text-decoration",
michael@0 188 "textIndent": "text-indent",
michael@0 189 "textLineThrough": "text-line-through",
michael@0 190 "textLineThroughColor": "text-line-through-color",
michael@0 191 "textLineThroughMode": "text-line-through-mode",
michael@0 192 "textLineThroughStyle": "text-line-through-style",
michael@0 193 "textLineThroughWidth": "text-line-through-width",
michael@0 194 "textOverflow": "text-overflow",
michael@0 195 "textOverline": "text-overline",
michael@0 196 "textOverlineColor": "text-overline-color",
michael@0 197 "textOverlineMode": "text-overline-mode",
michael@0 198 "textOverlineStyle": "text-overline-style",
michael@0 199 "textOverlineWidth": "text-overline-width",
michael@0 200 "textRendering": "text-rendering",
michael@0 201 "textShadow": "text-shadow",
michael@0 202 "textTransform": "text-transform",
michael@0 203 "textUnderline": "text-underline",
michael@0 204 "textUnderlineColor": "text-underline-color",
michael@0 205 "textUnderlineMode": "text-underline-mode",
michael@0 206 "textUnderlineStyle": "text-underline-style",
michael@0 207 "textUnderlineWidth": "text-underline-width",
michael@0 208 "top": "top",
michael@0 209 "unicodeBidi": "unicode-bidi",
michael@0 210 "unicodeRange": "unicode-range",
michael@0 211 "vectorEffect": "vector-effect",
michael@0 212 "verticalAlign": "vertical-align",
michael@0 213 "visibility": "visibility",
michael@0 214 "whiteSpace": "white-space",
michael@0 215 "widows": "widows",
michael@0 216 "width": "width",
michael@0 217 "wordBreak": "word-break",
michael@0 218 "wordSpacing": "word-spacing",
michael@0 219 "wordWrap": "word-wrap",
michael@0 220 "writingMode": "writing-mode",
michael@0 221 "zIndex": "z-index",
michael@0 222 "zoom": "zoom",
michael@0 223 };
michael@0 224
michael@0 225 // Elements that can be self-closing
michael@0 226 let voidElems = {
michael@0 227 "area": true,
michael@0 228 "base": true,
michael@0 229 "br": true,
michael@0 230 "col": true,
michael@0 231 "command": true,
michael@0 232 "embed": true,
michael@0 233 "hr": true,
michael@0 234 "img": true,
michael@0 235 "input": true,
michael@0 236 "link": true,
michael@0 237 "meta": true,
michael@0 238 "param": true,
michael@0 239 "source": true,
michael@0 240 };
michael@0 241
michael@0 242 // See http://www.w3schools.com/dom/dom_nodetype.asp
michael@0 243 let nodeTypes = {
michael@0 244 ELEMENT_NODE: 1,
michael@0 245 ATTRIBUTE_NODE: 2,
michael@0 246 TEXT_NODE: 3,
michael@0 247 CDATA_SECTION_NODE: 4,
michael@0 248 ENTITY_REFERENCE_NODE: 5,
michael@0 249 ENTITY_NODE: 6,
michael@0 250 PROCESSING_INSTRUCTION_NODE: 7,
michael@0 251 COMMENT_NODE: 8,
michael@0 252 DOCUMENT_NODE: 9,
michael@0 253 DOCUMENT_TYPE_NODE: 10,
michael@0 254 DOCUMENT_FRAGMENT_NODE: 11,
michael@0 255 NOTATION_NODE: 12
michael@0 256 };
michael@0 257
michael@0 258 function getElementsByTagName(tag) {
michael@0 259 tag = tag.toUpperCase();
michael@0 260 let elems = [];
michael@0 261 let allTags = (tag === "*");
michael@0 262 function getElems(node) {
michael@0 263 let length = node.childNodes.length;
michael@0 264 for (let i = 0; i < length; i++) {
michael@0 265 let child = node.childNodes[i];
michael@0 266 if (child.nodeType !== 1)
michael@0 267 continue;
michael@0 268 if (allTags || (child.tagName === tag))
michael@0 269 elems.push(child);
michael@0 270 getElems(child);
michael@0 271 }
michael@0 272 }
michael@0 273 getElems(this);
michael@0 274 return elems;
michael@0 275 }
michael@0 276
michael@0 277 let Node = function () {};
michael@0 278
michael@0 279 Node.prototype = {
michael@0 280 attributes: null,
michael@0 281 childNodes: null,
michael@0 282 localName: null,
michael@0 283 nodeName: null,
michael@0 284 parentNode: null,
michael@0 285 textContent: null,
michael@0 286
michael@0 287 get firstChild() {
michael@0 288 return this.childNodes[0] || null;
michael@0 289 },
michael@0 290
michael@0 291 get nextSibling() {
michael@0 292 if (this.parentNode) {
michael@0 293 let childNodes = this.parentNode.childNodes;
michael@0 294 return childNodes[childNodes.indexOf(this) + 1] || null;
michael@0 295 }
michael@0 296
michael@0 297 return null;
michael@0 298 },
michael@0 299
michael@0 300 appendChild: function (child) {
michael@0 301 if (child.parentNode) {
michael@0 302 child.parentNode.removeChild(child);
michael@0 303 }
michael@0 304
michael@0 305 this.childNodes.push(child);
michael@0 306 child.parentNode = this;
michael@0 307 },
michael@0 308
michael@0 309 removeChild: function (child) {
michael@0 310 let childNodes = this.childNodes;
michael@0 311 let childIndex = childNodes.indexOf(child);
michael@0 312 if (childIndex === -1) {
michael@0 313 throw "removeChild: node not found";
michael@0 314 } else {
michael@0 315 child.parentNode = null;
michael@0 316 return childNodes.splice(childIndex, 1)[0];
michael@0 317 }
michael@0 318 },
michael@0 319
michael@0 320 replaceChild: function (newNode, oldNode) {
michael@0 321 let childNodes = this.childNodes;
michael@0 322 let childIndex = childNodes.indexOf(oldNode);
michael@0 323 if (childIndex === -1) {
michael@0 324 throw "replaceChild: node not found";
michael@0 325 } else {
michael@0 326 if (newNode.parentNode)
michael@0 327 newNode.parentNode.removeChild(newNode);
michael@0 328
michael@0 329 childNodes[childIndex] = newNode;
michael@0 330 newNode.parentNode = this;
michael@0 331 oldNode.parentNode = null;
michael@0 332 return oldNode;
michael@0 333 }
michael@0 334 }
michael@0 335 };
michael@0 336
michael@0 337 for (let i in nodeTypes) {
michael@0 338 Node[i] = Node.prototype[i] = nodeTypes[i];
michael@0 339 }
michael@0 340
michael@0 341 let Attribute = function (name, value) {
michael@0 342 this.name = name;
michael@0 343 this.value = value;
michael@0 344 };
michael@0 345
michael@0 346 let Comment = function () {
michael@0 347 this.childNodes = [];
michael@0 348 };
michael@0 349
michael@0 350 Comment.prototype = {
michael@0 351 __proto__: Node.prototype,
michael@0 352
michael@0 353 nodeName: "#comment",
michael@0 354 nodeType: Node.COMMENT_NODE
michael@0 355 };
michael@0 356
michael@0 357 let Text = function () {
michael@0 358 this.childNodes = [];
michael@0 359 };
michael@0 360
michael@0 361 Text.prototype = {
michael@0 362 __proto__: Node.prototype,
michael@0 363
michael@0 364 nodeName: "#text",
michael@0 365 nodeType: Node.TEXT_NODE,
michael@0 366 textContent: ""
michael@0 367 }
michael@0 368
michael@0 369 let Document = function () {
michael@0 370 this.styleSheets = [];
michael@0 371 this.childNodes = [];
michael@0 372 };
michael@0 373
michael@0 374 Document.prototype = {
michael@0 375 __proto__: Node.prototype,
michael@0 376
michael@0 377 nodeName: "#document",
michael@0 378 nodeType: Node.DOCUMENT_NODE,
michael@0 379 title: "",
michael@0 380
michael@0 381 getElementsByTagName: getElementsByTagName,
michael@0 382
michael@0 383 getElementById: function (id) {
michael@0 384 function getElem(node) {
michael@0 385 let length = node.childNodes.length;
michael@0 386 if (node.id === id)
michael@0 387 return node;
michael@0 388 for (let i = 0; i < length; i++) {
michael@0 389 let el = getElem(node.childNodes[i]);
michael@0 390 if (el)
michael@0 391 return el;
michael@0 392 }
michael@0 393 return null;
michael@0 394 }
michael@0 395 return getElem(this);
michael@0 396 },
michael@0 397
michael@0 398 createElement: function (tag) {
michael@0 399 let node = new Element(tag);
michael@0 400 return node;
michael@0 401 }
michael@0 402 };
michael@0 403
michael@0 404 let Element = function (tag) {
michael@0 405 this.attributes = [];
michael@0 406 this.childNodes = [];
michael@0 407 this.localName = tag.toLowerCase();
michael@0 408 this.tagName = tag.toUpperCase();
michael@0 409 this.style = new Style(this);
michael@0 410 };
michael@0 411
michael@0 412 Element.prototype = {
michael@0 413 __proto__: Node.prototype,
michael@0 414
michael@0 415 nodeType: Node.ELEMENT_NODE,
michael@0 416
michael@0 417 getElementsByTagName: getElementsByTagName,
michael@0 418
michael@0 419 get className() {
michael@0 420 return this.getAttribute("class") || "";
michael@0 421 },
michael@0 422
michael@0 423 set className(str) {
michael@0 424 this.setAttribute("class", str);
michael@0 425 },
michael@0 426
michael@0 427 get id() {
michael@0 428 return this.getAttribute("id") || "";
michael@0 429 },
michael@0 430
michael@0 431 set id(str) {
michael@0 432 this.setAttribute("id", str);
michael@0 433 },
michael@0 434
michael@0 435 get href() {
michael@0 436 return this.getAttribute("href") || "";
michael@0 437 },
michael@0 438
michael@0 439 set href(str) {
michael@0 440 this.setAttribute("href", str);
michael@0 441 },
michael@0 442
michael@0 443 get src() {
michael@0 444 return this.getAttribute("src") || "";
michael@0 445 },
michael@0 446
michael@0 447 set src(str) {
michael@0 448 this.setAttribute("src", str);
michael@0 449 },
michael@0 450
michael@0 451 get nodeName() {
michael@0 452 return this.tagName;
michael@0 453 },
michael@0 454
michael@0 455 get innerHTML() {
michael@0 456 function getHTML(node) {
michael@0 457 let i = 0;
michael@0 458 for (i = 0; i < node.childNodes.length; i++) {
michael@0 459 let child = node.childNodes[i];
michael@0 460 if (child.localName) {
michael@0 461 arr.push("<" + child.localName);
michael@0 462
michael@0 463 // serialize attribute list
michael@0 464 for (let j = 0; j < child.attributes.length; j++) {
michael@0 465 let attr = child.attributes[j];
michael@0 466 let quote = (attr.value.indexOf('"') === -1 ? '"' : "'");
michael@0 467 arr.push(" " + attr.name + '=' + quote + attr.value + quote);
michael@0 468 }
michael@0 469
michael@0 470 if (child.localName in voidElems) {
michael@0 471 // if this is a self-closing element, end it here
michael@0 472 arr.push("/>");
michael@0 473 } else {
michael@0 474 // otherwise, add its children
michael@0 475 arr.push(">");
michael@0 476 getHTML(child);
michael@0 477 arr.push("</" + child.localName + ">");
michael@0 478 }
michael@0 479 } else {
michael@0 480 arr.push(child.textContent);
michael@0 481 }
michael@0 482 }
michael@0 483 }
michael@0 484
michael@0 485 // Using Array.join() avoids the overhead from lazy string concatenation.
michael@0 486 // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
michael@0 487 let arr = [];
michael@0 488 getHTML(this);
michael@0 489 return arr.join("");
michael@0 490 },
michael@0 491
michael@0 492 set innerHTML(html) {
michael@0 493 let parser = new JSDOMParser();
michael@0 494 let node = parser.parse(html);
michael@0 495 for (let i = this.childNodes.length; --i >= 0;) {
michael@0 496 this.childNodes[i].parentNode = null;
michael@0 497 }
michael@0 498 this.childNodes = node.childNodes;
michael@0 499 for (let i = this.childNodes.length; --i >= 0;) {
michael@0 500 this.childNodes[i].parentNode = this;
michael@0 501 }
michael@0 502 },
michael@0 503
michael@0 504 set textContent(text) {
michael@0 505 // clear parentNodes for existing children
michael@0 506 for (let i = this.childNodes.length; --i >= 0;) {
michael@0 507 this.childNodes[i].parentNode = null;
michael@0 508 }
michael@0 509
michael@0 510 let node = new Text();
michael@0 511 this.childNodes = [ node ];
michael@0 512 node.textContent = text;
michael@0 513 node.parentNode = this;
michael@0 514 },
michael@0 515
michael@0 516 get textContent() {
michael@0 517 function getText(node) {
michael@0 518 let nodes = node.childNodes;
michael@0 519 for (let i = 0; i < nodes.length; i++) {
michael@0 520 let child = nodes[i];
michael@0 521 if (child.nodeType === 3) {
michael@0 522 text.push(child.textContent);
michael@0 523 } else {
michael@0 524 getText(child);
michael@0 525 }
michael@0 526 }
michael@0 527 }
michael@0 528
michael@0 529 // Using Array.join() avoids the overhead from lazy string concatenation.
michael@0 530 // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
michael@0 531 let text = [];
michael@0 532 getText(this);
michael@0 533 return text.join("");
michael@0 534 },
michael@0 535
michael@0 536 getAttribute: function (name) {
michael@0 537 for (let i = this.attributes.length; --i >= 0;) {
michael@0 538 let attr = this.attributes[i];
michael@0 539 if (attr.name === name)
michael@0 540 return attr.value;
michael@0 541 }
michael@0 542 return undefined;
michael@0 543 },
michael@0 544
michael@0 545 setAttribute: function (name, value) {
michael@0 546 for (let i = this.attributes.length; --i >= 0;) {
michael@0 547 let attr = this.attributes[i];
michael@0 548 if (attr.name === name) {
michael@0 549 attr.value = value;
michael@0 550 return;
michael@0 551 }
michael@0 552 }
michael@0 553 this.attributes.push(new Attribute(name, value));
michael@0 554 },
michael@0 555
michael@0 556 removeAttribute: function (name) {
michael@0 557 for (let i = this.attributes.length; --i >= 0;) {
michael@0 558 let attr = this.attributes[i];
michael@0 559 if (attr.name === name) {
michael@0 560 this.attributes.splice(i, 1);
michael@0 561 break;
michael@0 562 }
michael@0 563 }
michael@0 564 }
michael@0 565 };
michael@0 566
michael@0 567 let Style = function (node) {
michael@0 568 this.node = node;
michael@0 569 };
michael@0 570
michael@0 571 // getStyle() and setStyle() use the style attribute string directly. This
michael@0 572 // won't be very efficient if there are a lot of style manipulations, but
michael@0 573 // it's the easiest way to make sure the style attribute string and the JS
michael@0 574 // style property stay in sync. Readability.js doesn't do many style
michael@0 575 // manipulations, so this should be okay.
michael@0 576 Style.prototype = {
michael@0 577 getStyle: function (styleName) {
michael@0 578 let attr = this.node.getAttribute("style");
michael@0 579 if (!attr)
michael@0 580 return undefined;
michael@0 581
michael@0 582 let styles = attr.value.split(";");
michael@0 583 for (let i = 0; i < styles.length; i++) {
michael@0 584 let style = styles[i].split(":");
michael@0 585 let name = style[0].trim();
michael@0 586 if (name === styleName)
michael@0 587 return style[1].trim();
michael@0 588 }
michael@0 589
michael@0 590 return undefined;
michael@0 591 },
michael@0 592
michael@0 593 setStyle: function (styleName, styleValue) {
michael@0 594 let attr = this.node.getAttribute("style");
michael@0 595 let value = (attr ? attr.value : "");
michael@0 596 let index = 0;
michael@0 597 do {
michael@0 598 let next = value.indexOf(";", index) + 1;
michael@0 599 let length = next - index - 1;
michael@0 600 let style = (length > 0 ? value.substr(index, length) : value.substr(index));
michael@0 601 if (style.substr(0, style.indexOf(":")).trim() === styleName) {
michael@0 602 value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : "");
michael@0 603 break;
michael@0 604 }
michael@0 605 index = next;
michael@0 606 } while (index);
michael@0 607
michael@0 608 value += " " + styleName + ": " + styleValue + ";";
michael@0 609 this.node.setAttribute("style", value.trim());
michael@0 610 }
michael@0 611 };
michael@0 612
michael@0 613 // For each item in styleMap, define a getter and setter on the style
michael@0 614 // property.
michael@0 615 for (let jsName in styleMap) {
michael@0 616 (function (cssName) {
michael@0 617 Style.prototype.__defineGetter__(jsName, function () {
michael@0 618 return this.getStyle(cssName);
michael@0 619 });
michael@0 620 Style.prototype.__defineSetter__(jsName, function (value) {
michael@0 621 this.setStyle(cssName, value);
michael@0 622 });
michael@0 623 }) (styleMap[jsName]);
michael@0 624 }
michael@0 625
michael@0 626 let JSDOMParser = function () {
michael@0 627 this.currentChar = 0;
michael@0 628 };
michael@0 629
michael@0 630 JSDOMParser.prototype = {
michael@0 631 /**
michael@0 632 * Look at the next character without advancing the index.
michael@0 633 */
michael@0 634 peekNext: function () {
michael@0 635 return this.html[this.currentChar];
michael@0 636 },
michael@0 637
michael@0 638 /**
michael@0 639 * Get the next character and advance the index.
michael@0 640 */
michael@0 641 nextChar: function () {
michael@0 642 return this.html[this.currentChar++];
michael@0 643 },
michael@0 644
michael@0 645 /**
michael@0 646 * Called after a quote character is read. This finds the next quote
michael@0 647 * character and returns the text string in between.
michael@0 648 */
michael@0 649 readString: function (quote) {
michael@0 650 let str;
michael@0 651 let n = this.html.indexOf(quote, this.currentChar);
michael@0 652 if (n === -1) {
michael@0 653 this.currentChar = this.html.length;
michael@0 654 str = null;
michael@0 655 } else {
michael@0 656 str = this.html.substring(this.currentChar, n);
michael@0 657 this.currentChar = n + 1;
michael@0 658 }
michael@0 659
michael@0 660 return str;
michael@0 661 },
michael@0 662
michael@0 663 /**
michael@0 664 * Called when parsing a node. This finds the next name/value attribute
michael@0 665 * pair and adds the result to the attributes list.
michael@0 666 */
michael@0 667 readAttribute: function (node) {
michael@0 668 let name = "";
michael@0 669
michael@0 670 let n = this.html.indexOf("=", this.currentChar);
michael@0 671 if (n === -1) {
michael@0 672 this.currentChar = this.html.length;
michael@0 673 } else {
michael@0 674 // Read until a '=' character is hit; this will be the attribute key
michael@0 675 name = this.html.substring(this.currentChar, n);
michael@0 676 this.currentChar = n + 1;
michael@0 677 }
michael@0 678
michael@0 679 if (!name)
michael@0 680 return;
michael@0 681
michael@0 682 // After a '=', we should see a '"' for the attribute value
michael@0 683 let c = this.nextChar();
michael@0 684 if (c !== '"' && c !== "'") {
michael@0 685 error("expecting '\"'");
michael@0 686 return;
michael@0 687 }
michael@0 688
michael@0 689 // Read the attribute value (and consume the matching quote)
michael@0 690 let value = this.readString(c);
michael@0 691
michael@0 692 if (!value)
michael@0 693 return;
michael@0 694
michael@0 695 node.attributes.push(new Attribute(name, value));
michael@0 696
michael@0 697 return;
michael@0 698 },
michael@0 699
michael@0 700 /**
michael@0 701 * Parses and returns an Element node. This is called after a '<' has been
michael@0 702 * read.
michael@0 703 *
michael@0 704 * @returns an array; the first index of the array is the parsed node;
michael@0 705 * the second index is a boolean indicating whether this is a void
michael@0 706 * Element
michael@0 707 */
michael@0 708 makeElementNode: function () {
michael@0 709 let c = this.nextChar();
michael@0 710
michael@0 711 // Read the Element tag name
michael@0 712 let tag = "";
michael@0 713 while (c !== " " && c !== ">" && c !== "/") {
michael@0 714 if (c === undefined)
michael@0 715 return null;
michael@0 716 tag += c;
michael@0 717 c = this.nextChar();
michael@0 718 }
michael@0 719
michael@0 720 if (!tag)
michael@0 721 return null;
michael@0 722
michael@0 723 let node = new Element(tag);
michael@0 724
michael@0 725 // Read Element attributes
michael@0 726 while (c !== "/" && c !== ">") {
michael@0 727 if (c === undefined)
michael@0 728 return null;
michael@0 729 while (this.match(" "));
michael@0 730 c = this.nextChar();
michael@0 731 if (c !== "/" && c !== ">") {
michael@0 732 --this.currentChar;
michael@0 733 this.readAttribute(node);
michael@0 734 }
michael@0 735 }
michael@0 736
michael@0 737 // If this is a self-closing tag, read '/>'
michael@0 738 let closed = tag in voidElems;
michael@0 739 if (c === "/") {
michael@0 740 closed = true;
michael@0 741 c = this.nextChar();
michael@0 742 if (c !== ">") {
michael@0 743 error("expected '>'");
michael@0 744 return null;
michael@0 745 }
michael@0 746 }
michael@0 747
michael@0 748 return [node, closed];
michael@0 749 },
michael@0 750
michael@0 751 /**
michael@0 752 * If the current input matches this string, advance the input index;
michael@0 753 * otherwise, do nothing.
michael@0 754 *
michael@0 755 * @returns whether input matched string
michael@0 756 */
michael@0 757 match: function (str) {
michael@0 758 let strlen = str.length;
michael@0 759 if (this.html.substr(this.currentChar, strlen) === str) {
michael@0 760 this.currentChar += strlen;
michael@0 761 return true;
michael@0 762 }
michael@0 763 return false;
michael@0 764 },
michael@0 765
michael@0 766 /**
michael@0 767 * Searches the input until a string is found and discards all input up to
michael@0 768 * and including the matched string.
michael@0 769 */
michael@0 770 discardTo: function (str) {
michael@0 771 let index = this.html.indexOf(str, this.currentChar) + str.length;
michael@0 772 if (index === -1)
michael@0 773 this.currentChar = this.html.length;
michael@0 774 this.currentChar = index;
michael@0 775 },
michael@0 776
michael@0 777 /**
michael@0 778 * Reads child nodes for the given node.
michael@0 779 */
michael@0 780 readChildren: function (node) {
michael@0 781 let child;
michael@0 782 while ((child = this.readNode())) {
michael@0 783 // Don't keep Comment nodes
michael@0 784 if (child.nodeType !== 8) {
michael@0 785 node.childNodes.push(child);
michael@0 786 child.parentNode = node;
michael@0 787 }
michael@0 788 }
michael@0 789 },
michael@0 790
michael@0 791 /**
michael@0 792 * Reads the next child node from the input. If we're reading a closing
michael@0 793 * tag, or if we've reached the end of input, return null.
michael@0 794 *
michael@0 795 * @returns the node
michael@0 796 */
michael@0 797 readNode: function () {
michael@0 798 let c = this.nextChar();
michael@0 799
michael@0 800 if (c === undefined)
michael@0 801 return null;
michael@0 802
michael@0 803 // Read any text as Text node
michael@0 804 if (c !== "<") {
michael@0 805 --this.currentChar;
michael@0 806 let node = new Text();
michael@0 807 let n = this.html.indexOf("<", this.currentChar);
michael@0 808 if (n === -1) {
michael@0 809 node.textContent = this.html.substring(this.currentChar, this.html.length);
michael@0 810 this.currentChar = this.html.length;
michael@0 811 } else {
michael@0 812 node.textContent = this.html.substring(this.currentChar, n);
michael@0 813 this.currentChar = n;
michael@0 814 }
michael@0 815 return node;
michael@0 816 }
michael@0 817
michael@0 818 c = this.peekNext();
michael@0 819
michael@0 820 // Read Comment node. Normally, Comment nodes know their inner
michael@0 821 // textContent, but we don't really care about Comment nodes (we throw
michael@0 822 // them away in readChildren()). So just returning an empty Comment node
michael@0 823 // here is sufficient.
michael@0 824 if (c === "!" || c === "?") {
michael@0 825 this.currentChar++;
michael@0 826 if (this.match("--")) {
michael@0 827 this.discardTo("-->");
michael@0 828 } else {
michael@0 829 let c = this.nextChar();
michael@0 830 while (c !== ">") {
michael@0 831 if (c === undefined)
michael@0 832 return null;
michael@0 833 if (c === '"' || c === "'")
michael@0 834 this.readString(c);
michael@0 835 c = this.nextChar();
michael@0 836 }
michael@0 837 }
michael@0 838 return new Comment();
michael@0 839 }
michael@0 840
michael@0 841 // If we're reading a closing tag, return null. This means we've reached
michael@0 842 // the end of this set of child nodes.
michael@0 843 if (c === "/") {
michael@0 844 --this.currentChar;
michael@0 845 return null;
michael@0 846 }
michael@0 847
michael@0 848 // Otherwise, we're looking at an Element node
michael@0 849 let result = this.makeElementNode();
michael@0 850 if (result === null)
michael@0 851 return null;
michael@0 852
michael@0 853 let [node, closed] = result;
michael@0 854 let localName = node.localName;
michael@0 855
michael@0 856 // If this isn't a void Element, read its child nodes
michael@0 857 if (!closed) {
michael@0 858 this.readChildren(node);
michael@0 859 let closingTag = "</" + localName + ">";
michael@0 860 if (!this.match(closingTag)) {
michael@0 861 error("expected '" + closingTag + "'");
michael@0 862 return null;
michael@0 863 }
michael@0 864 }
michael@0 865
michael@0 866 if (localName === "title") {
michael@0 867 this.doc.title = node.textContent.trim();
michael@0 868 } else if (localName === "head") {
michael@0 869 this.doc.head = node;
michael@0 870 } else if (localName === "body") {
michael@0 871 this.doc.body = node;
michael@0 872 } else if (localName === "html") {
michael@0 873 this.doc.documentElement = node;
michael@0 874 }
michael@0 875
michael@0 876 return node;
michael@0 877 },
michael@0 878
michael@0 879 /**
michael@0 880 * Parses an HTML string and returns a JS implementation of the Document.
michael@0 881 */
michael@0 882 parse: function (html) {
michael@0 883 this.html = html;
michael@0 884 let doc = this.doc = new Document();
michael@0 885 this.readChildren(doc);
michael@0 886
michael@0 887 // If this is an HTML document, remove root-level children except for the
michael@0 888 // <html> node
michael@0 889 if (doc.documentElement) {
michael@0 890 for (let i = doc.childNodes.length; --i >= 0;) {
michael@0 891 let child = doc.childNodes[i];
michael@0 892 if (child !== doc.documentElement) {
michael@0 893 doc.removeChild(child);
michael@0 894 }
michael@0 895 }
michael@0 896 }
michael@0 897
michael@0 898 return doc;
michael@0 899 }
michael@0 900 };
michael@0 901
michael@0 902 // Attach the standard DOM types to the global scope
michael@0 903 global.Node = Node;
michael@0 904 global.Comment = Comment;
michael@0 905 global.Document = Document;
michael@0 906 global.Element = Element;
michael@0 907 global.Text = Text;
michael@0 908
michael@0 909 // Attach JSDOMParser to the global scope
michael@0 910 global.JSDOMParser = JSDOMParser;
michael@0 911
michael@0 912 }) (this);

mercurial