|
1 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
|
3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
4 |
|
5 /** |
|
6 * This is a relatively lightweight DOMParser that is safe to use in a web |
|
7 * worker. This is far from a complete DOM implementation; however, it should |
|
8 * contain the minimal set of functionality necessary for Readability.js. |
|
9 * |
|
10 * Aside from not implementing the full DOM API, there are other quirks to be |
|
11 * aware of when using the JSDOMParser: |
|
12 * |
|
13 * 1) Properly formed HTML/XML must be used. This means you should be extra |
|
14 * careful when using this parser on anything received directly from an |
|
15 * XMLHttpRequest. Providing a serialized string from an XMLSerializer, |
|
16 * however, should be safe (since the browser's XMLSerializer should |
|
17 * generate valid HTML/XML). Therefore, if parsing a document from an XHR, |
|
18 * the recommended approach is to do the XHR in the main thread, use |
|
19 * XMLSerializer.serializeToString() on the responseXML, and pass the |
|
20 * resulting string to the worker. |
|
21 * |
|
22 * 2) Live NodeLists are not supported. DOM methods and properties such as |
|
23 * getElementsByTagName() and childNodes return standard arrays. If you |
|
24 * want these lists to be updated when nodes are removed or added to the |
|
25 * document, you must take care to manually update them yourself. |
|
26 */ |
|
27 (function (global) { |
|
28 |
|
29 function error(m) { |
|
30 dump("JSDOMParser error: " + m); |
|
31 } |
|
32 |
|
33 // When a style is set in JS, map it to the corresponding CSS attribute |
|
34 let styleMap = { |
|
35 "alignmentBaseline": "alignment-baseline", |
|
36 "background": "background", |
|
37 "backgroundAttachment": "background-attachment", |
|
38 "backgroundClip": "background-clip", |
|
39 "backgroundColor": "background-color", |
|
40 "backgroundImage": "background-image", |
|
41 "backgroundOrigin": "background-origin", |
|
42 "backgroundPosition": "background-position", |
|
43 "backgroundPositionX": "background-position-x", |
|
44 "backgroundPositionY": "background-position-y", |
|
45 "backgroundRepeat": "background-repeat", |
|
46 "backgroundRepeatX": "background-repeat-x", |
|
47 "backgroundRepeatY": "background-repeat-y", |
|
48 "backgroundSize": "background-size", |
|
49 "baselineShift": "baseline-shift", |
|
50 "border": "border", |
|
51 "borderBottom": "border-bottom", |
|
52 "borderBottomColor": "border-bottom-color", |
|
53 "borderBottomLeftRadius": "border-bottom-left-radius", |
|
54 "borderBottomRightRadius": "border-bottom-right-radius", |
|
55 "borderBottomStyle": "border-bottom-style", |
|
56 "borderBottomWidth": "border-bottom-width", |
|
57 "borderCollapse": "border-collapse", |
|
58 "borderColor": "border-color", |
|
59 "borderImage": "border-image", |
|
60 "borderImageOutset": "border-image-outset", |
|
61 "borderImageRepeat": "border-image-repeat", |
|
62 "borderImageSlice": "border-image-slice", |
|
63 "borderImageSource": "border-image-source", |
|
64 "borderImageWidth": "border-image-width", |
|
65 "borderLeft": "border-left", |
|
66 "borderLeftColor": "border-left-color", |
|
67 "borderLeftStyle": "border-left-style", |
|
68 "borderLeftWidth": "border-left-width", |
|
69 "borderRadius": "border-radius", |
|
70 "borderRight": "border-right", |
|
71 "borderRightColor": "border-right-color", |
|
72 "borderRightStyle": "border-right-style", |
|
73 "borderRightWidth": "border-right-width", |
|
74 "borderSpacing": "border-spacing", |
|
75 "borderStyle": "border-style", |
|
76 "borderTop": "border-top", |
|
77 "borderTopColor": "border-top-color", |
|
78 "borderTopLeftRadius": "border-top-left-radius", |
|
79 "borderTopRightRadius": "border-top-right-radius", |
|
80 "borderTopStyle": "border-top-style", |
|
81 "borderTopWidth": "border-top-width", |
|
82 "borderWidth": "border-width", |
|
83 "bottom": "bottom", |
|
84 "boxShadow": "box-shadow", |
|
85 "boxSizing": "box-sizing", |
|
86 "captionSide": "caption-side", |
|
87 "clear": "clear", |
|
88 "clip": "clip", |
|
89 "clipPath": "clip-path", |
|
90 "clipRule": "clip-rule", |
|
91 "color": "color", |
|
92 "colorInterpolation": "color-interpolation", |
|
93 "colorInterpolationFilters": "color-interpolation-filters", |
|
94 "colorProfile": "color-profile", |
|
95 "colorRendering": "color-rendering", |
|
96 "content": "content", |
|
97 "counterIncrement": "counter-increment", |
|
98 "counterReset": "counter-reset", |
|
99 "cursor": "cursor", |
|
100 "direction": "direction", |
|
101 "display": "display", |
|
102 "dominantBaseline": "dominant-baseline", |
|
103 "emptyCells": "empty-cells", |
|
104 "enableBackground": "enable-background", |
|
105 "fill": "fill", |
|
106 "fillOpacity": "fill-opacity", |
|
107 "fillRule": "fill-rule", |
|
108 "filter": "filter", |
|
109 "cssFloat": "float", |
|
110 "floodColor": "flood-color", |
|
111 "floodOpacity": "flood-opacity", |
|
112 "font": "font", |
|
113 "fontFamily": "font-family", |
|
114 "fontSize": "font-size", |
|
115 "fontStretch": "font-stretch", |
|
116 "fontStyle": "font-style", |
|
117 "fontVariant": "font-variant", |
|
118 "fontWeight": "font-weight", |
|
119 "glyphOrientationHorizontal": "glyph-orientation-horizontal", |
|
120 "glyphOrientationVertical": "glyph-orientation-vertical", |
|
121 "height": "height", |
|
122 "imageRendering": "image-rendering", |
|
123 "kerning": "kerning", |
|
124 "left": "left", |
|
125 "letterSpacing": "letter-spacing", |
|
126 "lightingColor": "lighting-color", |
|
127 "lineHeight": "line-height", |
|
128 "listStyle": "list-style", |
|
129 "listStyleImage": "list-style-image", |
|
130 "listStylePosition": "list-style-position", |
|
131 "listStyleType": "list-style-type", |
|
132 "margin": "margin", |
|
133 "marginBottom": "margin-bottom", |
|
134 "marginLeft": "margin-left", |
|
135 "marginRight": "margin-right", |
|
136 "marginTop": "margin-top", |
|
137 "marker": "marker", |
|
138 "markerEnd": "marker-end", |
|
139 "markerMid": "marker-mid", |
|
140 "markerStart": "marker-start", |
|
141 "mask": "mask", |
|
142 "maxHeight": "max-height", |
|
143 "maxWidth": "max-width", |
|
144 "minHeight": "min-height", |
|
145 "minWidth": "min-width", |
|
146 "opacity": "opacity", |
|
147 "orphans": "orphans", |
|
148 "outline": "outline", |
|
149 "outlineColor": "outline-color", |
|
150 "outlineOffset": "outline-offset", |
|
151 "outlineStyle": "outline-style", |
|
152 "outlineWidth": "outline-width", |
|
153 "overflow": "overflow", |
|
154 "overflowX": "overflow-x", |
|
155 "overflowY": "overflow-y", |
|
156 "padding": "padding", |
|
157 "paddingBottom": "padding-bottom", |
|
158 "paddingLeft": "padding-left", |
|
159 "paddingRight": "padding-right", |
|
160 "paddingTop": "padding-top", |
|
161 "page": "page", |
|
162 "pageBreakAfter": "page-break-after", |
|
163 "pageBreakBefore": "page-break-before", |
|
164 "pageBreakInside": "page-break-inside", |
|
165 "pointerEvents": "pointer-events", |
|
166 "position": "position", |
|
167 "quotes": "quotes", |
|
168 "resize": "resize", |
|
169 "right": "right", |
|
170 "shapeRendering": "shape-rendering", |
|
171 "size": "size", |
|
172 "speak": "speak", |
|
173 "src": "src", |
|
174 "stopColor": "stop-color", |
|
175 "stopOpacity": "stop-opacity", |
|
176 "stroke": "stroke", |
|
177 "strokeDasharray": "stroke-dasharray", |
|
178 "strokeDashoffset": "stroke-dashoffset", |
|
179 "strokeLinecap": "stroke-linecap", |
|
180 "strokeLinejoin": "stroke-linejoin", |
|
181 "strokeMiterlimit": "stroke-miterlimit", |
|
182 "strokeOpacity": "stroke-opacity", |
|
183 "strokeWidth": "stroke-width", |
|
184 "tableLayout": "table-layout", |
|
185 "textAlign": "text-align", |
|
186 "textAnchor": "text-anchor", |
|
187 "textDecoration": "text-decoration", |
|
188 "textIndent": "text-indent", |
|
189 "textLineThrough": "text-line-through", |
|
190 "textLineThroughColor": "text-line-through-color", |
|
191 "textLineThroughMode": "text-line-through-mode", |
|
192 "textLineThroughStyle": "text-line-through-style", |
|
193 "textLineThroughWidth": "text-line-through-width", |
|
194 "textOverflow": "text-overflow", |
|
195 "textOverline": "text-overline", |
|
196 "textOverlineColor": "text-overline-color", |
|
197 "textOverlineMode": "text-overline-mode", |
|
198 "textOverlineStyle": "text-overline-style", |
|
199 "textOverlineWidth": "text-overline-width", |
|
200 "textRendering": "text-rendering", |
|
201 "textShadow": "text-shadow", |
|
202 "textTransform": "text-transform", |
|
203 "textUnderline": "text-underline", |
|
204 "textUnderlineColor": "text-underline-color", |
|
205 "textUnderlineMode": "text-underline-mode", |
|
206 "textUnderlineStyle": "text-underline-style", |
|
207 "textUnderlineWidth": "text-underline-width", |
|
208 "top": "top", |
|
209 "unicodeBidi": "unicode-bidi", |
|
210 "unicodeRange": "unicode-range", |
|
211 "vectorEffect": "vector-effect", |
|
212 "verticalAlign": "vertical-align", |
|
213 "visibility": "visibility", |
|
214 "whiteSpace": "white-space", |
|
215 "widows": "widows", |
|
216 "width": "width", |
|
217 "wordBreak": "word-break", |
|
218 "wordSpacing": "word-spacing", |
|
219 "wordWrap": "word-wrap", |
|
220 "writingMode": "writing-mode", |
|
221 "zIndex": "z-index", |
|
222 "zoom": "zoom", |
|
223 }; |
|
224 |
|
225 // Elements that can be self-closing |
|
226 let voidElems = { |
|
227 "area": true, |
|
228 "base": true, |
|
229 "br": true, |
|
230 "col": true, |
|
231 "command": true, |
|
232 "embed": true, |
|
233 "hr": true, |
|
234 "img": true, |
|
235 "input": true, |
|
236 "link": true, |
|
237 "meta": true, |
|
238 "param": true, |
|
239 "source": true, |
|
240 }; |
|
241 |
|
242 // See http://www.w3schools.com/dom/dom_nodetype.asp |
|
243 let nodeTypes = { |
|
244 ELEMENT_NODE: 1, |
|
245 ATTRIBUTE_NODE: 2, |
|
246 TEXT_NODE: 3, |
|
247 CDATA_SECTION_NODE: 4, |
|
248 ENTITY_REFERENCE_NODE: 5, |
|
249 ENTITY_NODE: 6, |
|
250 PROCESSING_INSTRUCTION_NODE: 7, |
|
251 COMMENT_NODE: 8, |
|
252 DOCUMENT_NODE: 9, |
|
253 DOCUMENT_TYPE_NODE: 10, |
|
254 DOCUMENT_FRAGMENT_NODE: 11, |
|
255 NOTATION_NODE: 12 |
|
256 }; |
|
257 |
|
258 function getElementsByTagName(tag) { |
|
259 tag = tag.toUpperCase(); |
|
260 let elems = []; |
|
261 let allTags = (tag === "*"); |
|
262 function getElems(node) { |
|
263 let length = node.childNodes.length; |
|
264 for (let i = 0; i < length; i++) { |
|
265 let child = node.childNodes[i]; |
|
266 if (child.nodeType !== 1) |
|
267 continue; |
|
268 if (allTags || (child.tagName === tag)) |
|
269 elems.push(child); |
|
270 getElems(child); |
|
271 } |
|
272 } |
|
273 getElems(this); |
|
274 return elems; |
|
275 } |
|
276 |
|
277 let Node = function () {}; |
|
278 |
|
279 Node.prototype = { |
|
280 attributes: null, |
|
281 childNodes: null, |
|
282 localName: null, |
|
283 nodeName: null, |
|
284 parentNode: null, |
|
285 textContent: null, |
|
286 |
|
287 get firstChild() { |
|
288 return this.childNodes[0] || null; |
|
289 }, |
|
290 |
|
291 get nextSibling() { |
|
292 if (this.parentNode) { |
|
293 let childNodes = this.parentNode.childNodes; |
|
294 return childNodes[childNodes.indexOf(this) + 1] || null; |
|
295 } |
|
296 |
|
297 return null; |
|
298 }, |
|
299 |
|
300 appendChild: function (child) { |
|
301 if (child.parentNode) { |
|
302 child.parentNode.removeChild(child); |
|
303 } |
|
304 |
|
305 this.childNodes.push(child); |
|
306 child.parentNode = this; |
|
307 }, |
|
308 |
|
309 removeChild: function (child) { |
|
310 let childNodes = this.childNodes; |
|
311 let childIndex = childNodes.indexOf(child); |
|
312 if (childIndex === -1) { |
|
313 throw "removeChild: node not found"; |
|
314 } else { |
|
315 child.parentNode = null; |
|
316 return childNodes.splice(childIndex, 1)[0]; |
|
317 } |
|
318 }, |
|
319 |
|
320 replaceChild: function (newNode, oldNode) { |
|
321 let childNodes = this.childNodes; |
|
322 let childIndex = childNodes.indexOf(oldNode); |
|
323 if (childIndex === -1) { |
|
324 throw "replaceChild: node not found"; |
|
325 } else { |
|
326 if (newNode.parentNode) |
|
327 newNode.parentNode.removeChild(newNode); |
|
328 |
|
329 childNodes[childIndex] = newNode; |
|
330 newNode.parentNode = this; |
|
331 oldNode.parentNode = null; |
|
332 return oldNode; |
|
333 } |
|
334 } |
|
335 }; |
|
336 |
|
337 for (let i in nodeTypes) { |
|
338 Node[i] = Node.prototype[i] = nodeTypes[i]; |
|
339 } |
|
340 |
|
341 let Attribute = function (name, value) { |
|
342 this.name = name; |
|
343 this.value = value; |
|
344 }; |
|
345 |
|
346 let Comment = function () { |
|
347 this.childNodes = []; |
|
348 }; |
|
349 |
|
350 Comment.prototype = { |
|
351 __proto__: Node.prototype, |
|
352 |
|
353 nodeName: "#comment", |
|
354 nodeType: Node.COMMENT_NODE |
|
355 }; |
|
356 |
|
357 let Text = function () { |
|
358 this.childNodes = []; |
|
359 }; |
|
360 |
|
361 Text.prototype = { |
|
362 __proto__: Node.prototype, |
|
363 |
|
364 nodeName: "#text", |
|
365 nodeType: Node.TEXT_NODE, |
|
366 textContent: "" |
|
367 } |
|
368 |
|
369 let Document = function () { |
|
370 this.styleSheets = []; |
|
371 this.childNodes = []; |
|
372 }; |
|
373 |
|
374 Document.prototype = { |
|
375 __proto__: Node.prototype, |
|
376 |
|
377 nodeName: "#document", |
|
378 nodeType: Node.DOCUMENT_NODE, |
|
379 title: "", |
|
380 |
|
381 getElementsByTagName: getElementsByTagName, |
|
382 |
|
383 getElementById: function (id) { |
|
384 function getElem(node) { |
|
385 let length = node.childNodes.length; |
|
386 if (node.id === id) |
|
387 return node; |
|
388 for (let i = 0; i < length; i++) { |
|
389 let el = getElem(node.childNodes[i]); |
|
390 if (el) |
|
391 return el; |
|
392 } |
|
393 return null; |
|
394 } |
|
395 return getElem(this); |
|
396 }, |
|
397 |
|
398 createElement: function (tag) { |
|
399 let node = new Element(tag); |
|
400 return node; |
|
401 } |
|
402 }; |
|
403 |
|
404 let Element = function (tag) { |
|
405 this.attributes = []; |
|
406 this.childNodes = []; |
|
407 this.localName = tag.toLowerCase(); |
|
408 this.tagName = tag.toUpperCase(); |
|
409 this.style = new Style(this); |
|
410 }; |
|
411 |
|
412 Element.prototype = { |
|
413 __proto__: Node.prototype, |
|
414 |
|
415 nodeType: Node.ELEMENT_NODE, |
|
416 |
|
417 getElementsByTagName: getElementsByTagName, |
|
418 |
|
419 get className() { |
|
420 return this.getAttribute("class") || ""; |
|
421 }, |
|
422 |
|
423 set className(str) { |
|
424 this.setAttribute("class", str); |
|
425 }, |
|
426 |
|
427 get id() { |
|
428 return this.getAttribute("id") || ""; |
|
429 }, |
|
430 |
|
431 set id(str) { |
|
432 this.setAttribute("id", str); |
|
433 }, |
|
434 |
|
435 get href() { |
|
436 return this.getAttribute("href") || ""; |
|
437 }, |
|
438 |
|
439 set href(str) { |
|
440 this.setAttribute("href", str); |
|
441 }, |
|
442 |
|
443 get src() { |
|
444 return this.getAttribute("src") || ""; |
|
445 }, |
|
446 |
|
447 set src(str) { |
|
448 this.setAttribute("src", str); |
|
449 }, |
|
450 |
|
451 get nodeName() { |
|
452 return this.tagName; |
|
453 }, |
|
454 |
|
455 get innerHTML() { |
|
456 function getHTML(node) { |
|
457 let i = 0; |
|
458 for (i = 0; i < node.childNodes.length; i++) { |
|
459 let child = node.childNodes[i]; |
|
460 if (child.localName) { |
|
461 arr.push("<" + child.localName); |
|
462 |
|
463 // serialize attribute list |
|
464 for (let j = 0; j < child.attributes.length; j++) { |
|
465 let attr = child.attributes[j]; |
|
466 let quote = (attr.value.indexOf('"') === -1 ? '"' : "'"); |
|
467 arr.push(" " + attr.name + '=' + quote + attr.value + quote); |
|
468 } |
|
469 |
|
470 if (child.localName in voidElems) { |
|
471 // if this is a self-closing element, end it here |
|
472 arr.push("/>"); |
|
473 } else { |
|
474 // otherwise, add its children |
|
475 arr.push(">"); |
|
476 getHTML(child); |
|
477 arr.push("</" + child.localName + ">"); |
|
478 } |
|
479 } else { |
|
480 arr.push(child.textContent); |
|
481 } |
|
482 } |
|
483 } |
|
484 |
|
485 // Using Array.join() avoids the overhead from lazy string concatenation. |
|
486 // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes |
|
487 let arr = []; |
|
488 getHTML(this); |
|
489 return arr.join(""); |
|
490 }, |
|
491 |
|
492 set innerHTML(html) { |
|
493 let parser = new JSDOMParser(); |
|
494 let node = parser.parse(html); |
|
495 for (let i = this.childNodes.length; --i >= 0;) { |
|
496 this.childNodes[i].parentNode = null; |
|
497 } |
|
498 this.childNodes = node.childNodes; |
|
499 for (let i = this.childNodes.length; --i >= 0;) { |
|
500 this.childNodes[i].parentNode = this; |
|
501 } |
|
502 }, |
|
503 |
|
504 set textContent(text) { |
|
505 // clear parentNodes for existing children |
|
506 for (let i = this.childNodes.length; --i >= 0;) { |
|
507 this.childNodes[i].parentNode = null; |
|
508 } |
|
509 |
|
510 let node = new Text(); |
|
511 this.childNodes = [ node ]; |
|
512 node.textContent = text; |
|
513 node.parentNode = this; |
|
514 }, |
|
515 |
|
516 get textContent() { |
|
517 function getText(node) { |
|
518 let nodes = node.childNodes; |
|
519 for (let i = 0; i < nodes.length; i++) { |
|
520 let child = nodes[i]; |
|
521 if (child.nodeType === 3) { |
|
522 text.push(child.textContent); |
|
523 } else { |
|
524 getText(child); |
|
525 } |
|
526 } |
|
527 } |
|
528 |
|
529 // Using Array.join() avoids the overhead from lazy string concatenation. |
|
530 // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes |
|
531 let text = []; |
|
532 getText(this); |
|
533 return text.join(""); |
|
534 }, |
|
535 |
|
536 getAttribute: function (name) { |
|
537 for (let i = this.attributes.length; --i >= 0;) { |
|
538 let attr = this.attributes[i]; |
|
539 if (attr.name === name) |
|
540 return attr.value; |
|
541 } |
|
542 return undefined; |
|
543 }, |
|
544 |
|
545 setAttribute: function (name, value) { |
|
546 for (let i = this.attributes.length; --i >= 0;) { |
|
547 let attr = this.attributes[i]; |
|
548 if (attr.name === name) { |
|
549 attr.value = value; |
|
550 return; |
|
551 } |
|
552 } |
|
553 this.attributes.push(new Attribute(name, value)); |
|
554 }, |
|
555 |
|
556 removeAttribute: function (name) { |
|
557 for (let i = this.attributes.length; --i >= 0;) { |
|
558 let attr = this.attributes[i]; |
|
559 if (attr.name === name) { |
|
560 this.attributes.splice(i, 1); |
|
561 break; |
|
562 } |
|
563 } |
|
564 } |
|
565 }; |
|
566 |
|
567 let Style = function (node) { |
|
568 this.node = node; |
|
569 }; |
|
570 |
|
571 // getStyle() and setStyle() use the style attribute string directly. This |
|
572 // won't be very efficient if there are a lot of style manipulations, but |
|
573 // it's the easiest way to make sure the style attribute string and the JS |
|
574 // style property stay in sync. Readability.js doesn't do many style |
|
575 // manipulations, so this should be okay. |
|
576 Style.prototype = { |
|
577 getStyle: function (styleName) { |
|
578 let attr = this.node.getAttribute("style"); |
|
579 if (!attr) |
|
580 return undefined; |
|
581 |
|
582 let styles = attr.value.split(";"); |
|
583 for (let i = 0; i < styles.length; i++) { |
|
584 let style = styles[i].split(":"); |
|
585 let name = style[0].trim(); |
|
586 if (name === styleName) |
|
587 return style[1].trim(); |
|
588 } |
|
589 |
|
590 return undefined; |
|
591 }, |
|
592 |
|
593 setStyle: function (styleName, styleValue) { |
|
594 let attr = this.node.getAttribute("style"); |
|
595 let value = (attr ? attr.value : ""); |
|
596 let index = 0; |
|
597 do { |
|
598 let next = value.indexOf(";", index) + 1; |
|
599 let length = next - index - 1; |
|
600 let style = (length > 0 ? value.substr(index, length) : value.substr(index)); |
|
601 if (style.substr(0, style.indexOf(":")).trim() === styleName) { |
|
602 value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : ""); |
|
603 break; |
|
604 } |
|
605 index = next; |
|
606 } while (index); |
|
607 |
|
608 value += " " + styleName + ": " + styleValue + ";"; |
|
609 this.node.setAttribute("style", value.trim()); |
|
610 } |
|
611 }; |
|
612 |
|
613 // For each item in styleMap, define a getter and setter on the style |
|
614 // property. |
|
615 for (let jsName in styleMap) { |
|
616 (function (cssName) { |
|
617 Style.prototype.__defineGetter__(jsName, function () { |
|
618 return this.getStyle(cssName); |
|
619 }); |
|
620 Style.prototype.__defineSetter__(jsName, function (value) { |
|
621 this.setStyle(cssName, value); |
|
622 }); |
|
623 }) (styleMap[jsName]); |
|
624 } |
|
625 |
|
626 let JSDOMParser = function () { |
|
627 this.currentChar = 0; |
|
628 }; |
|
629 |
|
630 JSDOMParser.prototype = { |
|
631 /** |
|
632 * Look at the next character without advancing the index. |
|
633 */ |
|
634 peekNext: function () { |
|
635 return this.html[this.currentChar]; |
|
636 }, |
|
637 |
|
638 /** |
|
639 * Get the next character and advance the index. |
|
640 */ |
|
641 nextChar: function () { |
|
642 return this.html[this.currentChar++]; |
|
643 }, |
|
644 |
|
645 /** |
|
646 * Called after a quote character is read. This finds the next quote |
|
647 * character and returns the text string in between. |
|
648 */ |
|
649 readString: function (quote) { |
|
650 let str; |
|
651 let n = this.html.indexOf(quote, this.currentChar); |
|
652 if (n === -1) { |
|
653 this.currentChar = this.html.length; |
|
654 str = null; |
|
655 } else { |
|
656 str = this.html.substring(this.currentChar, n); |
|
657 this.currentChar = n + 1; |
|
658 } |
|
659 |
|
660 return str; |
|
661 }, |
|
662 |
|
663 /** |
|
664 * Called when parsing a node. This finds the next name/value attribute |
|
665 * pair and adds the result to the attributes list. |
|
666 */ |
|
667 readAttribute: function (node) { |
|
668 let name = ""; |
|
669 |
|
670 let n = this.html.indexOf("=", this.currentChar); |
|
671 if (n === -1) { |
|
672 this.currentChar = this.html.length; |
|
673 } else { |
|
674 // Read until a '=' character is hit; this will be the attribute key |
|
675 name = this.html.substring(this.currentChar, n); |
|
676 this.currentChar = n + 1; |
|
677 } |
|
678 |
|
679 if (!name) |
|
680 return; |
|
681 |
|
682 // After a '=', we should see a '"' for the attribute value |
|
683 let c = this.nextChar(); |
|
684 if (c !== '"' && c !== "'") { |
|
685 error("expecting '\"'"); |
|
686 return; |
|
687 } |
|
688 |
|
689 // Read the attribute value (and consume the matching quote) |
|
690 let value = this.readString(c); |
|
691 |
|
692 if (!value) |
|
693 return; |
|
694 |
|
695 node.attributes.push(new Attribute(name, value)); |
|
696 |
|
697 return; |
|
698 }, |
|
699 |
|
700 /** |
|
701 * Parses and returns an Element node. This is called after a '<' has been |
|
702 * read. |
|
703 * |
|
704 * @returns an array; the first index of the array is the parsed node; |
|
705 * the second index is a boolean indicating whether this is a void |
|
706 * Element |
|
707 */ |
|
708 makeElementNode: function () { |
|
709 let c = this.nextChar(); |
|
710 |
|
711 // Read the Element tag name |
|
712 let tag = ""; |
|
713 while (c !== " " && c !== ">" && c !== "/") { |
|
714 if (c === undefined) |
|
715 return null; |
|
716 tag += c; |
|
717 c = this.nextChar(); |
|
718 } |
|
719 |
|
720 if (!tag) |
|
721 return null; |
|
722 |
|
723 let node = new Element(tag); |
|
724 |
|
725 // Read Element attributes |
|
726 while (c !== "/" && c !== ">") { |
|
727 if (c === undefined) |
|
728 return null; |
|
729 while (this.match(" ")); |
|
730 c = this.nextChar(); |
|
731 if (c !== "/" && c !== ">") { |
|
732 --this.currentChar; |
|
733 this.readAttribute(node); |
|
734 } |
|
735 } |
|
736 |
|
737 // If this is a self-closing tag, read '/>' |
|
738 let closed = tag in voidElems; |
|
739 if (c === "/") { |
|
740 closed = true; |
|
741 c = this.nextChar(); |
|
742 if (c !== ">") { |
|
743 error("expected '>'"); |
|
744 return null; |
|
745 } |
|
746 } |
|
747 |
|
748 return [node, closed]; |
|
749 }, |
|
750 |
|
751 /** |
|
752 * If the current input matches this string, advance the input index; |
|
753 * otherwise, do nothing. |
|
754 * |
|
755 * @returns whether input matched string |
|
756 */ |
|
757 match: function (str) { |
|
758 let strlen = str.length; |
|
759 if (this.html.substr(this.currentChar, strlen) === str) { |
|
760 this.currentChar += strlen; |
|
761 return true; |
|
762 } |
|
763 return false; |
|
764 }, |
|
765 |
|
766 /** |
|
767 * Searches the input until a string is found and discards all input up to |
|
768 * and including the matched string. |
|
769 */ |
|
770 discardTo: function (str) { |
|
771 let index = this.html.indexOf(str, this.currentChar) + str.length; |
|
772 if (index === -1) |
|
773 this.currentChar = this.html.length; |
|
774 this.currentChar = index; |
|
775 }, |
|
776 |
|
777 /** |
|
778 * Reads child nodes for the given node. |
|
779 */ |
|
780 readChildren: function (node) { |
|
781 let child; |
|
782 while ((child = this.readNode())) { |
|
783 // Don't keep Comment nodes |
|
784 if (child.nodeType !== 8) { |
|
785 node.childNodes.push(child); |
|
786 child.parentNode = node; |
|
787 } |
|
788 } |
|
789 }, |
|
790 |
|
791 /** |
|
792 * Reads the next child node from the input. If we're reading a closing |
|
793 * tag, or if we've reached the end of input, return null. |
|
794 * |
|
795 * @returns the node |
|
796 */ |
|
797 readNode: function () { |
|
798 let c = this.nextChar(); |
|
799 |
|
800 if (c === undefined) |
|
801 return null; |
|
802 |
|
803 // Read any text as Text node |
|
804 if (c !== "<") { |
|
805 --this.currentChar; |
|
806 let node = new Text(); |
|
807 let n = this.html.indexOf("<", this.currentChar); |
|
808 if (n === -1) { |
|
809 node.textContent = this.html.substring(this.currentChar, this.html.length); |
|
810 this.currentChar = this.html.length; |
|
811 } else { |
|
812 node.textContent = this.html.substring(this.currentChar, n); |
|
813 this.currentChar = n; |
|
814 } |
|
815 return node; |
|
816 } |
|
817 |
|
818 c = this.peekNext(); |
|
819 |
|
820 // Read Comment node. Normally, Comment nodes know their inner |
|
821 // textContent, but we don't really care about Comment nodes (we throw |
|
822 // them away in readChildren()). So just returning an empty Comment node |
|
823 // here is sufficient. |
|
824 if (c === "!" || c === "?") { |
|
825 this.currentChar++; |
|
826 if (this.match("--")) { |
|
827 this.discardTo("-->"); |
|
828 } else { |
|
829 let c = this.nextChar(); |
|
830 while (c !== ">") { |
|
831 if (c === undefined) |
|
832 return null; |
|
833 if (c === '"' || c === "'") |
|
834 this.readString(c); |
|
835 c = this.nextChar(); |
|
836 } |
|
837 } |
|
838 return new Comment(); |
|
839 } |
|
840 |
|
841 // If we're reading a closing tag, return null. This means we've reached |
|
842 // the end of this set of child nodes. |
|
843 if (c === "/") { |
|
844 --this.currentChar; |
|
845 return null; |
|
846 } |
|
847 |
|
848 // Otherwise, we're looking at an Element node |
|
849 let result = this.makeElementNode(); |
|
850 if (result === null) |
|
851 return null; |
|
852 |
|
853 let [node, closed] = result; |
|
854 let localName = node.localName; |
|
855 |
|
856 // If this isn't a void Element, read its child nodes |
|
857 if (!closed) { |
|
858 this.readChildren(node); |
|
859 let closingTag = "</" + localName + ">"; |
|
860 if (!this.match(closingTag)) { |
|
861 error("expected '" + closingTag + "'"); |
|
862 return null; |
|
863 } |
|
864 } |
|
865 |
|
866 if (localName === "title") { |
|
867 this.doc.title = node.textContent.trim(); |
|
868 } else if (localName === "head") { |
|
869 this.doc.head = node; |
|
870 } else if (localName === "body") { |
|
871 this.doc.body = node; |
|
872 } else if (localName === "html") { |
|
873 this.doc.documentElement = node; |
|
874 } |
|
875 |
|
876 return node; |
|
877 }, |
|
878 |
|
879 /** |
|
880 * Parses an HTML string and returns a JS implementation of the Document. |
|
881 */ |
|
882 parse: function (html) { |
|
883 this.html = html; |
|
884 let doc = this.doc = new Document(); |
|
885 this.readChildren(doc); |
|
886 |
|
887 // If this is an HTML document, remove root-level children except for the |
|
888 // <html> node |
|
889 if (doc.documentElement) { |
|
890 for (let i = doc.childNodes.length; --i >= 0;) { |
|
891 let child = doc.childNodes[i]; |
|
892 if (child !== doc.documentElement) { |
|
893 doc.removeChild(child); |
|
894 } |
|
895 } |
|
896 } |
|
897 |
|
898 return doc; |
|
899 } |
|
900 }; |
|
901 |
|
902 // Attach the standard DOM types to the global scope |
|
903 global.Node = Node; |
|
904 global.Comment = Comment; |
|
905 global.Document = Document; |
|
906 global.Element = Element; |
|
907 global.Text = Text; |
|
908 |
|
909 // Attach JSDOMParser to the global scope |
|
910 global.JSDOMParser = JSDOMParser; |
|
911 |
|
912 }) (this); |