michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /** michael@0: * This is a relatively lightweight DOMParser that is safe to use in a web michael@0: * worker. This is far from a complete DOM implementation; however, it should michael@0: * contain the minimal set of functionality necessary for Readability.js. michael@0: * michael@0: * Aside from not implementing the full DOM API, there are other quirks to be michael@0: * aware of when using the JSDOMParser: michael@0: * michael@0: * 1) Properly formed HTML/XML must be used. This means you should be extra michael@0: * careful when using this parser on anything received directly from an michael@0: * XMLHttpRequest. Providing a serialized string from an XMLSerializer, michael@0: * however, should be safe (since the browser's XMLSerializer should michael@0: * generate valid HTML/XML). Therefore, if parsing a document from an XHR, michael@0: * the recommended approach is to do the XHR in the main thread, use michael@0: * XMLSerializer.serializeToString() on the responseXML, and pass the michael@0: * resulting string to the worker. michael@0: * michael@0: * 2) Live NodeLists are not supported. DOM methods and properties such as michael@0: * getElementsByTagName() and childNodes return standard arrays. If you michael@0: * want these lists to be updated when nodes are removed or added to the michael@0: * document, you must take care to manually update them yourself. michael@0: */ michael@0: (function (global) { michael@0: michael@0: function error(m) { michael@0: dump("JSDOMParser error: " + m); michael@0: } michael@0: michael@0: // When a style is set in JS, map it to the corresponding CSS attribute michael@0: let styleMap = { michael@0: "alignmentBaseline": "alignment-baseline", michael@0: "background": "background", michael@0: "backgroundAttachment": "background-attachment", michael@0: "backgroundClip": "background-clip", michael@0: "backgroundColor": "background-color", michael@0: "backgroundImage": "background-image", michael@0: "backgroundOrigin": "background-origin", michael@0: "backgroundPosition": "background-position", michael@0: "backgroundPositionX": "background-position-x", michael@0: "backgroundPositionY": "background-position-y", michael@0: "backgroundRepeat": "background-repeat", michael@0: "backgroundRepeatX": "background-repeat-x", michael@0: "backgroundRepeatY": "background-repeat-y", michael@0: "backgroundSize": "background-size", michael@0: "baselineShift": "baseline-shift", michael@0: "border": "border", michael@0: "borderBottom": "border-bottom", michael@0: "borderBottomColor": "border-bottom-color", michael@0: "borderBottomLeftRadius": "border-bottom-left-radius", michael@0: "borderBottomRightRadius": "border-bottom-right-radius", michael@0: "borderBottomStyle": "border-bottom-style", michael@0: "borderBottomWidth": "border-bottom-width", michael@0: "borderCollapse": "border-collapse", michael@0: "borderColor": "border-color", michael@0: "borderImage": "border-image", michael@0: "borderImageOutset": "border-image-outset", michael@0: "borderImageRepeat": "border-image-repeat", michael@0: "borderImageSlice": "border-image-slice", michael@0: "borderImageSource": "border-image-source", michael@0: "borderImageWidth": "border-image-width", michael@0: "borderLeft": "border-left", michael@0: "borderLeftColor": "border-left-color", michael@0: "borderLeftStyle": "border-left-style", michael@0: "borderLeftWidth": "border-left-width", michael@0: "borderRadius": "border-radius", michael@0: "borderRight": "border-right", michael@0: "borderRightColor": "border-right-color", michael@0: "borderRightStyle": "border-right-style", michael@0: "borderRightWidth": "border-right-width", michael@0: "borderSpacing": "border-spacing", michael@0: "borderStyle": "border-style", michael@0: "borderTop": "border-top", michael@0: "borderTopColor": "border-top-color", michael@0: "borderTopLeftRadius": "border-top-left-radius", michael@0: "borderTopRightRadius": "border-top-right-radius", michael@0: "borderTopStyle": "border-top-style", michael@0: "borderTopWidth": "border-top-width", michael@0: "borderWidth": "border-width", michael@0: "bottom": "bottom", michael@0: "boxShadow": "box-shadow", michael@0: "boxSizing": "box-sizing", michael@0: "captionSide": "caption-side", michael@0: "clear": "clear", michael@0: "clip": "clip", michael@0: "clipPath": "clip-path", michael@0: "clipRule": "clip-rule", michael@0: "color": "color", michael@0: "colorInterpolation": "color-interpolation", michael@0: "colorInterpolationFilters": "color-interpolation-filters", michael@0: "colorProfile": "color-profile", michael@0: "colorRendering": "color-rendering", michael@0: "content": "content", michael@0: "counterIncrement": "counter-increment", michael@0: "counterReset": "counter-reset", michael@0: "cursor": "cursor", michael@0: "direction": "direction", michael@0: "display": "display", michael@0: "dominantBaseline": "dominant-baseline", michael@0: "emptyCells": "empty-cells", michael@0: "enableBackground": "enable-background", michael@0: "fill": "fill", michael@0: "fillOpacity": "fill-opacity", michael@0: "fillRule": "fill-rule", michael@0: "filter": "filter", michael@0: "cssFloat": "float", michael@0: "floodColor": "flood-color", michael@0: "floodOpacity": "flood-opacity", michael@0: "font": "font", michael@0: "fontFamily": "font-family", michael@0: "fontSize": "font-size", michael@0: "fontStretch": "font-stretch", michael@0: "fontStyle": "font-style", michael@0: "fontVariant": "font-variant", michael@0: "fontWeight": "font-weight", michael@0: "glyphOrientationHorizontal": "glyph-orientation-horizontal", michael@0: "glyphOrientationVertical": "glyph-orientation-vertical", michael@0: "height": "height", michael@0: "imageRendering": "image-rendering", michael@0: "kerning": "kerning", michael@0: "left": "left", michael@0: "letterSpacing": "letter-spacing", michael@0: "lightingColor": "lighting-color", michael@0: "lineHeight": "line-height", michael@0: "listStyle": "list-style", michael@0: "listStyleImage": "list-style-image", michael@0: "listStylePosition": "list-style-position", michael@0: "listStyleType": "list-style-type", michael@0: "margin": "margin", michael@0: "marginBottom": "margin-bottom", michael@0: "marginLeft": "margin-left", michael@0: "marginRight": "margin-right", michael@0: "marginTop": "margin-top", michael@0: "marker": "marker", michael@0: "markerEnd": "marker-end", michael@0: "markerMid": "marker-mid", michael@0: "markerStart": "marker-start", michael@0: "mask": "mask", michael@0: "maxHeight": "max-height", michael@0: "maxWidth": "max-width", michael@0: "minHeight": "min-height", michael@0: "minWidth": "min-width", michael@0: "opacity": "opacity", michael@0: "orphans": "orphans", michael@0: "outline": "outline", michael@0: "outlineColor": "outline-color", michael@0: "outlineOffset": "outline-offset", michael@0: "outlineStyle": "outline-style", michael@0: "outlineWidth": "outline-width", michael@0: "overflow": "overflow", michael@0: "overflowX": "overflow-x", michael@0: "overflowY": "overflow-y", michael@0: "padding": "padding", michael@0: "paddingBottom": "padding-bottom", michael@0: "paddingLeft": "padding-left", michael@0: "paddingRight": "padding-right", michael@0: "paddingTop": "padding-top", michael@0: "page": "page", michael@0: "pageBreakAfter": "page-break-after", michael@0: "pageBreakBefore": "page-break-before", michael@0: "pageBreakInside": "page-break-inside", michael@0: "pointerEvents": "pointer-events", michael@0: "position": "position", michael@0: "quotes": "quotes", michael@0: "resize": "resize", michael@0: "right": "right", michael@0: "shapeRendering": "shape-rendering", michael@0: "size": "size", michael@0: "speak": "speak", michael@0: "src": "src", michael@0: "stopColor": "stop-color", michael@0: "stopOpacity": "stop-opacity", michael@0: "stroke": "stroke", michael@0: "strokeDasharray": "stroke-dasharray", michael@0: "strokeDashoffset": "stroke-dashoffset", michael@0: "strokeLinecap": "stroke-linecap", michael@0: "strokeLinejoin": "stroke-linejoin", michael@0: "strokeMiterlimit": "stroke-miterlimit", michael@0: "strokeOpacity": "stroke-opacity", michael@0: "strokeWidth": "stroke-width", michael@0: "tableLayout": "table-layout", michael@0: "textAlign": "text-align", michael@0: "textAnchor": "text-anchor", michael@0: "textDecoration": "text-decoration", michael@0: "textIndent": "text-indent", michael@0: "textLineThrough": "text-line-through", michael@0: "textLineThroughColor": "text-line-through-color", michael@0: "textLineThroughMode": "text-line-through-mode", michael@0: "textLineThroughStyle": "text-line-through-style", michael@0: "textLineThroughWidth": "text-line-through-width", michael@0: "textOverflow": "text-overflow", michael@0: "textOverline": "text-overline", michael@0: "textOverlineColor": "text-overline-color", michael@0: "textOverlineMode": "text-overline-mode", michael@0: "textOverlineStyle": "text-overline-style", michael@0: "textOverlineWidth": "text-overline-width", michael@0: "textRendering": "text-rendering", michael@0: "textShadow": "text-shadow", michael@0: "textTransform": "text-transform", michael@0: "textUnderline": "text-underline", michael@0: "textUnderlineColor": "text-underline-color", michael@0: "textUnderlineMode": "text-underline-mode", michael@0: "textUnderlineStyle": "text-underline-style", michael@0: "textUnderlineWidth": "text-underline-width", michael@0: "top": "top", michael@0: "unicodeBidi": "unicode-bidi", michael@0: "unicodeRange": "unicode-range", michael@0: "vectorEffect": "vector-effect", michael@0: "verticalAlign": "vertical-align", michael@0: "visibility": "visibility", michael@0: "whiteSpace": "white-space", michael@0: "widows": "widows", michael@0: "width": "width", michael@0: "wordBreak": "word-break", michael@0: "wordSpacing": "word-spacing", michael@0: "wordWrap": "word-wrap", michael@0: "writingMode": "writing-mode", michael@0: "zIndex": "z-index", michael@0: "zoom": "zoom", michael@0: }; michael@0: michael@0: // Elements that can be self-closing michael@0: let voidElems = { michael@0: "area": true, michael@0: "base": true, michael@0: "br": true, michael@0: "col": true, michael@0: "command": true, michael@0: "embed": true, michael@0: "hr": true, michael@0: "img": true, michael@0: "input": true, michael@0: "link": true, michael@0: "meta": true, michael@0: "param": true, michael@0: "source": true, michael@0: }; michael@0: michael@0: // See http://www.w3schools.com/dom/dom_nodetype.asp michael@0: let nodeTypes = { michael@0: ELEMENT_NODE: 1, michael@0: ATTRIBUTE_NODE: 2, michael@0: TEXT_NODE: 3, michael@0: CDATA_SECTION_NODE: 4, michael@0: ENTITY_REFERENCE_NODE: 5, michael@0: ENTITY_NODE: 6, michael@0: PROCESSING_INSTRUCTION_NODE: 7, michael@0: COMMENT_NODE: 8, michael@0: DOCUMENT_NODE: 9, michael@0: DOCUMENT_TYPE_NODE: 10, michael@0: DOCUMENT_FRAGMENT_NODE: 11, michael@0: NOTATION_NODE: 12 michael@0: }; michael@0: michael@0: function getElementsByTagName(tag) { michael@0: tag = tag.toUpperCase(); michael@0: let elems = []; michael@0: let allTags = (tag === "*"); michael@0: function getElems(node) { michael@0: let length = node.childNodes.length; michael@0: for (let i = 0; i < length; i++) { michael@0: let child = node.childNodes[i]; michael@0: if (child.nodeType !== 1) michael@0: continue; michael@0: if (allTags || (child.tagName === tag)) michael@0: elems.push(child); michael@0: getElems(child); michael@0: } michael@0: } michael@0: getElems(this); michael@0: return elems; michael@0: } michael@0: michael@0: let Node = function () {}; michael@0: michael@0: Node.prototype = { michael@0: attributes: null, michael@0: childNodes: null, michael@0: localName: null, michael@0: nodeName: null, michael@0: parentNode: null, michael@0: textContent: null, michael@0: michael@0: get firstChild() { michael@0: return this.childNodes[0] || null; michael@0: }, michael@0: michael@0: get nextSibling() { michael@0: if (this.parentNode) { michael@0: let childNodes = this.parentNode.childNodes; michael@0: return childNodes[childNodes.indexOf(this) + 1] || null; michael@0: } michael@0: michael@0: return null; michael@0: }, michael@0: michael@0: appendChild: function (child) { michael@0: if (child.parentNode) { michael@0: child.parentNode.removeChild(child); michael@0: } michael@0: michael@0: this.childNodes.push(child); michael@0: child.parentNode = this; michael@0: }, michael@0: michael@0: removeChild: function (child) { michael@0: let childNodes = this.childNodes; michael@0: let childIndex = childNodes.indexOf(child); michael@0: if (childIndex === -1) { michael@0: throw "removeChild: node not found"; michael@0: } else { michael@0: child.parentNode = null; michael@0: return childNodes.splice(childIndex, 1)[0]; michael@0: } michael@0: }, michael@0: michael@0: replaceChild: function (newNode, oldNode) { michael@0: let childNodes = this.childNodes; michael@0: let childIndex = childNodes.indexOf(oldNode); michael@0: if (childIndex === -1) { michael@0: throw "replaceChild: node not found"; michael@0: } else { michael@0: if (newNode.parentNode) michael@0: newNode.parentNode.removeChild(newNode); michael@0: michael@0: childNodes[childIndex] = newNode; michael@0: newNode.parentNode = this; michael@0: oldNode.parentNode = null; michael@0: return oldNode; michael@0: } michael@0: } michael@0: }; michael@0: michael@0: for (let i in nodeTypes) { michael@0: Node[i] = Node.prototype[i] = nodeTypes[i]; michael@0: } michael@0: michael@0: let Attribute = function (name, value) { michael@0: this.name = name; michael@0: this.value = value; michael@0: }; michael@0: michael@0: let Comment = function () { michael@0: this.childNodes = []; michael@0: }; michael@0: michael@0: Comment.prototype = { michael@0: __proto__: Node.prototype, michael@0: michael@0: nodeName: "#comment", michael@0: nodeType: Node.COMMENT_NODE michael@0: }; michael@0: michael@0: let Text = function () { michael@0: this.childNodes = []; michael@0: }; michael@0: michael@0: Text.prototype = { michael@0: __proto__: Node.prototype, michael@0: michael@0: nodeName: "#text", michael@0: nodeType: Node.TEXT_NODE, michael@0: textContent: "" michael@0: } michael@0: michael@0: let Document = function () { michael@0: this.styleSheets = []; michael@0: this.childNodes = []; michael@0: }; michael@0: michael@0: Document.prototype = { michael@0: __proto__: Node.prototype, michael@0: michael@0: nodeName: "#document", michael@0: nodeType: Node.DOCUMENT_NODE, michael@0: title: "", michael@0: michael@0: getElementsByTagName: getElementsByTagName, michael@0: michael@0: getElementById: function (id) { michael@0: function getElem(node) { michael@0: let length = node.childNodes.length; michael@0: if (node.id === id) michael@0: return node; michael@0: for (let i = 0; i < length; i++) { michael@0: let el = getElem(node.childNodes[i]); michael@0: if (el) michael@0: return el; michael@0: } michael@0: return null; michael@0: } michael@0: return getElem(this); michael@0: }, michael@0: michael@0: createElement: function (tag) { michael@0: let node = new Element(tag); michael@0: return node; michael@0: } michael@0: }; michael@0: michael@0: let Element = function (tag) { michael@0: this.attributes = []; michael@0: this.childNodes = []; michael@0: this.localName = tag.toLowerCase(); michael@0: this.tagName = tag.toUpperCase(); michael@0: this.style = new Style(this); michael@0: }; michael@0: michael@0: Element.prototype = { michael@0: __proto__: Node.prototype, michael@0: michael@0: nodeType: Node.ELEMENT_NODE, michael@0: michael@0: getElementsByTagName: getElementsByTagName, michael@0: michael@0: get className() { michael@0: return this.getAttribute("class") || ""; michael@0: }, michael@0: michael@0: set className(str) { michael@0: this.setAttribute("class", str); michael@0: }, michael@0: michael@0: get id() { michael@0: return this.getAttribute("id") || ""; michael@0: }, michael@0: michael@0: set id(str) { michael@0: this.setAttribute("id", str); michael@0: }, michael@0: michael@0: get href() { michael@0: return this.getAttribute("href") || ""; michael@0: }, michael@0: michael@0: set href(str) { michael@0: this.setAttribute("href", str); michael@0: }, michael@0: michael@0: get src() { michael@0: return this.getAttribute("src") || ""; michael@0: }, michael@0: michael@0: set src(str) { michael@0: this.setAttribute("src", str); michael@0: }, michael@0: michael@0: get nodeName() { michael@0: return this.tagName; michael@0: }, michael@0: michael@0: get innerHTML() { michael@0: function getHTML(node) { michael@0: let i = 0; michael@0: for (i = 0; i < node.childNodes.length; i++) { michael@0: let child = node.childNodes[i]; michael@0: if (child.localName) { michael@0: arr.push("<" + child.localName); michael@0: michael@0: // serialize attribute list michael@0: for (let j = 0; j < child.attributes.length; j++) { michael@0: let attr = child.attributes[j]; michael@0: let quote = (attr.value.indexOf('"') === -1 ? '"' : "'"); michael@0: arr.push(" " + attr.name + '=' + quote + attr.value + quote); michael@0: } michael@0: michael@0: if (child.localName in voidElems) { michael@0: // if this is a self-closing element, end it here michael@0: arr.push("/>"); michael@0: } else { michael@0: // otherwise, add its children michael@0: arr.push(">"); michael@0: getHTML(child); michael@0: arr.push(""); michael@0: } michael@0: } else { michael@0: arr.push(child.textContent); michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Using Array.join() avoids the overhead from lazy string concatenation. michael@0: // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes michael@0: let arr = []; michael@0: getHTML(this); michael@0: return arr.join(""); michael@0: }, michael@0: michael@0: set innerHTML(html) { michael@0: let parser = new JSDOMParser(); michael@0: let node = parser.parse(html); michael@0: for (let i = this.childNodes.length; --i >= 0;) { michael@0: this.childNodes[i].parentNode = null; michael@0: } michael@0: this.childNodes = node.childNodes; michael@0: for (let i = this.childNodes.length; --i >= 0;) { michael@0: this.childNodes[i].parentNode = this; michael@0: } michael@0: }, michael@0: michael@0: set textContent(text) { michael@0: // clear parentNodes for existing children michael@0: for (let i = this.childNodes.length; --i >= 0;) { michael@0: this.childNodes[i].parentNode = null; michael@0: } michael@0: michael@0: let node = new Text(); michael@0: this.childNodes = [ node ]; michael@0: node.textContent = text; michael@0: node.parentNode = this; michael@0: }, michael@0: michael@0: get textContent() { michael@0: function getText(node) { michael@0: let nodes = node.childNodes; michael@0: for (let i = 0; i < nodes.length; i++) { michael@0: let child = nodes[i]; michael@0: if (child.nodeType === 3) { michael@0: text.push(child.textContent); michael@0: } else { michael@0: getText(child); michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Using Array.join() avoids the overhead from lazy string concatenation. michael@0: // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes michael@0: let text = []; michael@0: getText(this); michael@0: return text.join(""); michael@0: }, michael@0: michael@0: getAttribute: function (name) { michael@0: for (let i = this.attributes.length; --i >= 0;) { michael@0: let attr = this.attributes[i]; michael@0: if (attr.name === name) michael@0: return attr.value; michael@0: } michael@0: return undefined; michael@0: }, michael@0: michael@0: setAttribute: function (name, value) { michael@0: for (let i = this.attributes.length; --i >= 0;) { michael@0: let attr = this.attributes[i]; michael@0: if (attr.name === name) { michael@0: attr.value = value; michael@0: return; michael@0: } michael@0: } michael@0: this.attributes.push(new Attribute(name, value)); michael@0: }, michael@0: michael@0: removeAttribute: function (name) { michael@0: for (let i = this.attributes.length; --i >= 0;) { michael@0: let attr = this.attributes[i]; michael@0: if (attr.name === name) { michael@0: this.attributes.splice(i, 1); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: }; michael@0: michael@0: let Style = function (node) { michael@0: this.node = node; michael@0: }; michael@0: michael@0: // getStyle() and setStyle() use the style attribute string directly. This michael@0: // won't be very efficient if there are a lot of style manipulations, but michael@0: // it's the easiest way to make sure the style attribute string and the JS michael@0: // style property stay in sync. Readability.js doesn't do many style michael@0: // manipulations, so this should be okay. michael@0: Style.prototype = { michael@0: getStyle: function (styleName) { michael@0: let attr = this.node.getAttribute("style"); michael@0: if (!attr) michael@0: return undefined; michael@0: michael@0: let styles = attr.value.split(";"); michael@0: for (let i = 0; i < styles.length; i++) { michael@0: let style = styles[i].split(":"); michael@0: let name = style[0].trim(); michael@0: if (name === styleName) michael@0: return style[1].trim(); michael@0: } michael@0: michael@0: return undefined; michael@0: }, michael@0: michael@0: setStyle: function (styleName, styleValue) { michael@0: let attr = this.node.getAttribute("style"); michael@0: let value = (attr ? attr.value : ""); michael@0: let index = 0; michael@0: do { michael@0: let next = value.indexOf(";", index) + 1; michael@0: let length = next - index - 1; michael@0: let style = (length > 0 ? value.substr(index, length) : value.substr(index)); michael@0: if (style.substr(0, style.indexOf(":")).trim() === styleName) { michael@0: value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : ""); michael@0: break; michael@0: } michael@0: index = next; michael@0: } while (index); michael@0: michael@0: value += " " + styleName + ": " + styleValue + ";"; michael@0: this.node.setAttribute("style", value.trim()); michael@0: } michael@0: }; michael@0: michael@0: // For each item in styleMap, define a getter and setter on the style michael@0: // property. michael@0: for (let jsName in styleMap) { michael@0: (function (cssName) { michael@0: Style.prototype.__defineGetter__(jsName, function () { michael@0: return this.getStyle(cssName); michael@0: }); michael@0: Style.prototype.__defineSetter__(jsName, function (value) { michael@0: this.setStyle(cssName, value); michael@0: }); michael@0: }) (styleMap[jsName]); michael@0: } michael@0: michael@0: let JSDOMParser = function () { michael@0: this.currentChar = 0; michael@0: }; michael@0: michael@0: JSDOMParser.prototype = { michael@0: /** michael@0: * Look at the next character without advancing the index. michael@0: */ michael@0: peekNext: function () { michael@0: return this.html[this.currentChar]; michael@0: }, michael@0: michael@0: /** michael@0: * Get the next character and advance the index. michael@0: */ michael@0: nextChar: function () { michael@0: return this.html[this.currentChar++]; michael@0: }, michael@0: michael@0: /** michael@0: * Called after a quote character is read. This finds the next quote michael@0: * character and returns the text string in between. michael@0: */ michael@0: readString: function (quote) { michael@0: let str; michael@0: let n = this.html.indexOf(quote, this.currentChar); michael@0: if (n === -1) { michael@0: this.currentChar = this.html.length; michael@0: str = null; michael@0: } else { michael@0: str = this.html.substring(this.currentChar, n); michael@0: this.currentChar = n + 1; michael@0: } michael@0: michael@0: return str; michael@0: }, michael@0: michael@0: /** michael@0: * Called when parsing a node. This finds the next name/value attribute michael@0: * pair and adds the result to the attributes list. michael@0: */ michael@0: readAttribute: function (node) { michael@0: let name = ""; michael@0: michael@0: let n = this.html.indexOf("=", this.currentChar); michael@0: if (n === -1) { michael@0: this.currentChar = this.html.length; michael@0: } else { michael@0: // Read until a '=' character is hit; this will be the attribute key michael@0: name = this.html.substring(this.currentChar, n); michael@0: this.currentChar = n + 1; michael@0: } michael@0: michael@0: if (!name) michael@0: return; michael@0: michael@0: // After a '=', we should see a '"' for the attribute value michael@0: let c = this.nextChar(); michael@0: if (c !== '"' && c !== "'") { michael@0: error("expecting '\"'"); michael@0: return; michael@0: } michael@0: michael@0: // Read the attribute value (and consume the matching quote) michael@0: let value = this.readString(c); michael@0: michael@0: if (!value) michael@0: return; michael@0: michael@0: node.attributes.push(new Attribute(name, value)); michael@0: michael@0: return; michael@0: }, michael@0: michael@0: /** michael@0: * Parses and returns an Element node. This is called after a '<' has been michael@0: * read. michael@0: * michael@0: * @returns an array; the first index of the array is the parsed node; michael@0: * the second index is a boolean indicating whether this is a void michael@0: * Element michael@0: */ michael@0: makeElementNode: function () { michael@0: let c = this.nextChar(); michael@0: michael@0: // Read the Element tag name michael@0: let tag = ""; michael@0: while (c !== " " && c !== ">" && c !== "/") { michael@0: if (c === undefined) michael@0: return null; michael@0: tag += c; michael@0: c = this.nextChar(); michael@0: } michael@0: michael@0: if (!tag) michael@0: return null; michael@0: michael@0: let node = new Element(tag); michael@0: michael@0: // Read Element attributes michael@0: while (c !== "/" && c !== ">") { michael@0: if (c === undefined) michael@0: return null; michael@0: while (this.match(" ")); michael@0: c = this.nextChar(); michael@0: if (c !== "/" && c !== ">") { michael@0: --this.currentChar; michael@0: this.readAttribute(node); michael@0: } michael@0: } michael@0: michael@0: // If this is a self-closing tag, read '/>' michael@0: let closed = tag in voidElems; michael@0: if (c === "/") { michael@0: closed = true; michael@0: c = this.nextChar(); michael@0: if (c !== ">") { michael@0: error("expected '>'"); michael@0: return null; michael@0: } michael@0: } michael@0: michael@0: return [node, closed]; michael@0: }, michael@0: michael@0: /** michael@0: * If the current input matches this string, advance the input index; michael@0: * otherwise, do nothing. michael@0: * michael@0: * @returns whether input matched string michael@0: */ michael@0: match: function (str) { michael@0: let strlen = str.length; michael@0: if (this.html.substr(this.currentChar, strlen) === str) { michael@0: this.currentChar += strlen; michael@0: return true; michael@0: } michael@0: return false; michael@0: }, michael@0: michael@0: /** michael@0: * Searches the input until a string is found and discards all input up to michael@0: * and including the matched string. michael@0: */ michael@0: discardTo: function (str) { michael@0: let index = this.html.indexOf(str, this.currentChar) + str.length; michael@0: if (index === -1) michael@0: this.currentChar = this.html.length; michael@0: this.currentChar = index; michael@0: }, michael@0: michael@0: /** michael@0: * Reads child nodes for the given node. michael@0: */ michael@0: readChildren: function (node) { michael@0: let child; michael@0: while ((child = this.readNode())) { michael@0: // Don't keep Comment nodes michael@0: if (child.nodeType !== 8) { michael@0: node.childNodes.push(child); michael@0: child.parentNode = node; michael@0: } michael@0: } michael@0: }, michael@0: michael@0: /** michael@0: * Reads the next child node from the input. If we're reading a closing michael@0: * tag, or if we've reached the end of input, return null. michael@0: * michael@0: * @returns the node michael@0: */ michael@0: readNode: function () { michael@0: let c = this.nextChar(); michael@0: michael@0: if (c === undefined) michael@0: return null; michael@0: michael@0: // Read any text as Text node michael@0: if (c !== "<") { michael@0: --this.currentChar; michael@0: let node = new Text(); michael@0: let n = this.html.indexOf("<", this.currentChar); michael@0: if (n === -1) { michael@0: node.textContent = this.html.substring(this.currentChar, this.html.length); michael@0: this.currentChar = this.html.length; michael@0: } else { michael@0: node.textContent = this.html.substring(this.currentChar, n); michael@0: this.currentChar = n; michael@0: } michael@0: return node; michael@0: } michael@0: michael@0: c = this.peekNext(); michael@0: michael@0: // Read Comment node. Normally, Comment nodes know their inner michael@0: // textContent, but we don't really care about Comment nodes (we throw michael@0: // them away in readChildren()). So just returning an empty Comment node michael@0: // here is sufficient. michael@0: if (c === "!" || c === "?") { michael@0: this.currentChar++; michael@0: if (this.match("--")) { michael@0: this.discardTo("-->"); michael@0: } else { michael@0: let c = this.nextChar(); michael@0: while (c !== ">") { michael@0: if (c === undefined) michael@0: return null; michael@0: if (c === '"' || c === "'") michael@0: this.readString(c); michael@0: c = this.nextChar(); michael@0: } michael@0: } michael@0: return new Comment(); michael@0: } michael@0: michael@0: // If we're reading a closing tag, return null. This means we've reached michael@0: // the end of this set of child nodes. michael@0: if (c === "/") { michael@0: --this.currentChar; michael@0: return null; michael@0: } michael@0: michael@0: // Otherwise, we're looking at an Element node michael@0: let result = this.makeElementNode(); michael@0: if (result === null) michael@0: return null; michael@0: michael@0: let [node, closed] = result; michael@0: let localName = node.localName; michael@0: michael@0: // If this isn't a void Element, read its child nodes michael@0: if (!closed) { michael@0: this.readChildren(node); michael@0: let closingTag = ""; michael@0: if (!this.match(closingTag)) { michael@0: error("expected '" + closingTag + "'"); michael@0: return null; michael@0: } michael@0: } michael@0: michael@0: if (localName === "title") { michael@0: this.doc.title = node.textContent.trim(); michael@0: } else if (localName === "head") { michael@0: this.doc.head = node; michael@0: } else if (localName === "body") { michael@0: this.doc.body = node; michael@0: } else if (localName === "html") { michael@0: this.doc.documentElement = node; michael@0: } michael@0: michael@0: return node; michael@0: }, michael@0: michael@0: /** michael@0: * Parses an HTML string and returns a JS implementation of the Document. michael@0: */ michael@0: parse: function (html) { michael@0: this.html = html; michael@0: let doc = this.doc = new Document(); michael@0: this.readChildren(doc); michael@0: michael@0: // If this is an HTML document, remove root-level children except for the michael@0: // node michael@0: if (doc.documentElement) { michael@0: for (let i = doc.childNodes.length; --i >= 0;) { michael@0: let child = doc.childNodes[i]; michael@0: if (child !== doc.documentElement) { michael@0: doc.removeChild(child); michael@0: } michael@0: } michael@0: } michael@0: michael@0: return doc; michael@0: } michael@0: }; michael@0: michael@0: // Attach the standard DOM types to the global scope michael@0: global.Node = Node; michael@0: global.Comment = Comment; michael@0: global.Document = Document; michael@0: global.Element = Element; michael@0: global.Text = Text; michael@0: michael@0: // Attach JSDOMParser to the global scope michael@0: global.JSDOMParser = JSDOMParser; michael@0: michael@0: }) (this);