Thu, 15 Jan 2015 21:03:48 +0100
Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set ts=2 sw=2 et tw=80: */ |
michael@0 | 3 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | /* |
michael@0 | 8 | * nsIContentSerializer implementation that can be used with an |
michael@0 | 9 | * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML |
michael@0 | 10 | * string that could be parsed into more or less the original DOM. |
michael@0 | 11 | */ |
michael@0 | 12 | |
michael@0 | 13 | #include "nsHTMLContentSerializer.h" |
michael@0 | 14 | |
michael@0 | 15 | #include "nsIDOMElement.h" |
michael@0 | 16 | #include "nsIContent.h" |
michael@0 | 17 | #include "nsIDocument.h" |
michael@0 | 18 | #include "nsNameSpaceManager.h" |
michael@0 | 19 | #include "nsString.h" |
michael@0 | 20 | #include "nsUnicharUtils.h" |
michael@0 | 21 | #include "nsXPIDLString.h" |
michael@0 | 22 | #include "nsIServiceManager.h" |
michael@0 | 23 | #include "nsIDocumentEncoder.h" |
michael@0 | 24 | #include "nsGkAtoms.h" |
michael@0 | 25 | #include "nsIURI.h" |
michael@0 | 26 | #include "nsNetUtil.h" |
michael@0 | 27 | #include "nsEscape.h" |
michael@0 | 28 | #include "nsITextToSubURI.h" |
michael@0 | 29 | #include "nsCRT.h" |
michael@0 | 30 | #include "nsIParserService.h" |
michael@0 | 31 | #include "nsContentUtils.h" |
michael@0 | 32 | #include "nsLWBrkCIID.h" |
michael@0 | 33 | #include "nsIScriptElement.h" |
michael@0 | 34 | #include "nsAttrName.h" |
michael@0 | 35 | #include "nsIDocShell.h" |
michael@0 | 36 | #include "nsIEditor.h" |
michael@0 | 37 | #include "nsIHTMLEditor.h" |
michael@0 | 38 | #include "mozilla/dom/Element.h" |
michael@0 | 39 | #include "nsParserConstants.h" |
michael@0 | 40 | |
michael@0 | 41 | using namespace mozilla::dom; |
michael@0 | 42 | |
michael@0 | 43 | nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer) |
michael@0 | 44 | { |
michael@0 | 45 | nsHTMLContentSerializer* it = new nsHTMLContentSerializer(); |
michael@0 | 46 | if (!it) { |
michael@0 | 47 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 48 | } |
michael@0 | 49 | |
michael@0 | 50 | return CallQueryInterface(it, aSerializer); |
michael@0 | 51 | } |
michael@0 | 52 | |
michael@0 | 53 | nsHTMLContentSerializer::nsHTMLContentSerializer() |
michael@0 | 54 | { |
michael@0 | 55 | mIsHTMLSerializer = true; |
michael@0 | 56 | } |
michael@0 | 57 | |
michael@0 | 58 | nsHTMLContentSerializer::~nsHTMLContentSerializer() |
michael@0 | 59 | { |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | |
michael@0 | 63 | NS_IMETHODIMP |
michael@0 | 64 | nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument, |
michael@0 | 65 | nsAString& aStr) |
michael@0 | 66 | { |
michael@0 | 67 | return NS_OK; |
michael@0 | 68 | } |
michael@0 | 69 | |
michael@0 | 70 | void |
michael@0 | 71 | nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent, |
michael@0 | 72 | nsIContent *aOriginalElement, |
michael@0 | 73 | nsAString& aTagPrefix, |
michael@0 | 74 | const nsAString& aTagNamespaceURI, |
michael@0 | 75 | nsIAtom* aTagName, |
michael@0 | 76 | int32_t aNamespace, |
michael@0 | 77 | nsAString& aStr) |
michael@0 | 78 | { |
michael@0 | 79 | int32_t count = aContent->GetAttrCount(); |
michael@0 | 80 | if (!count) |
michael@0 | 81 | return; |
michael@0 | 82 | |
michael@0 | 83 | nsresult rv; |
michael@0 | 84 | nsAutoString valueStr; |
michael@0 | 85 | NS_NAMED_LITERAL_STRING(_mozStr, "_moz"); |
michael@0 | 86 | |
michael@0 | 87 | for (int32_t index = count; index > 0;) { |
michael@0 | 88 | --index; |
michael@0 | 89 | const nsAttrName* name = aContent->GetAttrNameAt(index); |
michael@0 | 90 | int32_t namespaceID = name->NamespaceID(); |
michael@0 | 91 | nsIAtom* attrName = name->LocalName(); |
michael@0 | 92 | |
michael@0 | 93 | // Filter out any attribute starting with [-|_]moz |
michael@0 | 94 | nsDependentAtomString attrNameStr(attrName); |
michael@0 | 95 | if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) || |
michael@0 | 96 | StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) { |
michael@0 | 97 | continue; |
michael@0 | 98 | } |
michael@0 | 99 | aContent->GetAttr(namespaceID, attrName, valueStr); |
michael@0 | 100 | |
michael@0 | 101 | // |
michael@0 | 102 | // Filter out special case of <br type="_moz"> or <br _moz*>, |
michael@0 | 103 | // used by the editor. Bug 16988. Yuck. |
michael@0 | 104 | // |
michael@0 | 105 | if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML && |
michael@0 | 106 | attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None && |
michael@0 | 107 | StringBeginsWith(valueStr, _mozStr)) { |
michael@0 | 108 | continue; |
michael@0 | 109 | } |
michael@0 | 110 | |
michael@0 | 111 | if (mIsCopying && mIsFirstChildOfOL && |
michael@0 | 112 | aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML && |
michael@0 | 113 | attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){ |
michael@0 | 114 | // This is handled separately in SerializeLIValueAttribute() |
michael@0 | 115 | continue; |
michael@0 | 116 | } |
michael@0 | 117 | bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr); |
michael@0 | 118 | |
michael@0 | 119 | if (((attrName == nsGkAtoms::href && |
michael@0 | 120 | (namespaceID == kNameSpaceID_None || |
michael@0 | 121 | namespaceID == kNameSpaceID_XLink)) || |
michael@0 | 122 | (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) { |
michael@0 | 123 | // Make all links absolute when converting only the selection: |
michael@0 | 124 | if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) { |
michael@0 | 125 | // Would be nice to handle OBJECT and APPLET tags, |
michael@0 | 126 | // but that gets more complicated since we have to |
michael@0 | 127 | // search the tag list for CODEBASE as well. |
michael@0 | 128 | // For now, just leave them relative. |
michael@0 | 129 | nsCOMPtr<nsIURI> uri = aContent->GetBaseURI(); |
michael@0 | 130 | if (uri) { |
michael@0 | 131 | nsAutoString absURI; |
michael@0 | 132 | rv = NS_MakeAbsoluteURI(absURI, valueStr, uri); |
michael@0 | 133 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 134 | valueStr = absURI; |
michael@0 | 135 | } |
michael@0 | 136 | } |
michael@0 | 137 | } |
michael@0 | 138 | // Need to escape URI. |
michael@0 | 139 | nsAutoString tempURI(valueStr); |
michael@0 | 140 | if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr))) |
michael@0 | 141 | valueStr = tempURI; |
michael@0 | 142 | } |
michael@0 | 143 | |
michael@0 | 144 | if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta && |
michael@0 | 145 | aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content |
michael@0 | 146 | && namespaceID == kNameSpaceID_None) { |
michael@0 | 147 | // If we're serializing a <meta http-equiv="content-type">, |
michael@0 | 148 | // use the proper value, rather than what's in the document. |
michael@0 | 149 | nsAutoString header; |
michael@0 | 150 | aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header); |
michael@0 | 151 | if (header.LowerCaseEqualsLiteral("content-type")) { |
michael@0 | 152 | valueStr = NS_LITERAL_STRING("text/html; charset=") + |
michael@0 | 153 | NS_ConvertASCIItoUTF16(mCharset); |
michael@0 | 154 | } |
michael@0 | 155 | } |
michael@0 | 156 | |
michael@0 | 157 | nsDependentAtomString nameStr(attrName); |
michael@0 | 158 | nsAutoString prefix; |
michael@0 | 159 | if (namespaceID == kNameSpaceID_XML) { |
michael@0 | 160 | prefix.Assign(NS_LITERAL_STRING("xml")); |
michael@0 | 161 | } else if (namespaceID == kNameSpaceID_XLink) { |
michael@0 | 162 | prefix.Assign(NS_LITERAL_STRING("xlink")); |
michael@0 | 163 | } |
michael@0 | 164 | |
michael@0 | 165 | // Expand shorthand attribute. |
michael@0 | 166 | if (aNamespace == kNameSpaceID_XHTML && |
michael@0 | 167 | namespaceID == kNameSpaceID_None && |
michael@0 | 168 | IsShorthandAttr(attrName, aTagName) && |
michael@0 | 169 | valueStr.IsEmpty()) { |
michael@0 | 170 | valueStr = nameStr; |
michael@0 | 171 | } |
michael@0 | 172 | SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS); |
michael@0 | 173 | } |
michael@0 | 174 | } |
michael@0 | 175 | |
michael@0 | 176 | NS_IMETHODIMP |
michael@0 | 177 | nsHTMLContentSerializer::AppendElementStart(Element* aElement, |
michael@0 | 178 | Element* aOriginalElement, |
michael@0 | 179 | nsAString& aStr) |
michael@0 | 180 | { |
michael@0 | 181 | NS_ENSURE_ARG(aElement); |
michael@0 | 182 | |
michael@0 | 183 | nsIContent* content = aElement; |
michael@0 | 184 | |
michael@0 | 185 | bool forceFormat = false; |
michael@0 | 186 | if (!CheckElementStart(content, forceFormat, aStr)) { |
michael@0 | 187 | return NS_OK; |
michael@0 | 188 | } |
michael@0 | 189 | |
michael@0 | 190 | nsIAtom *name = content->Tag(); |
michael@0 | 191 | int32_t ns = content->GetNameSpaceID(); |
michael@0 | 192 | |
michael@0 | 193 | bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name); |
michael@0 | 194 | |
michael@0 | 195 | if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) { |
michael@0 | 196 | if (mColPos && lineBreakBeforeOpen) { |
michael@0 | 197 | AppendNewLineToString(aStr); |
michael@0 | 198 | } |
michael@0 | 199 | else { |
michael@0 | 200 | MaybeAddNewlineForRootNode(aStr); |
michael@0 | 201 | } |
michael@0 | 202 | if (!mColPos) { |
michael@0 | 203 | AppendIndentation(aStr); |
michael@0 | 204 | } |
michael@0 | 205 | else if (mAddSpace) { |
michael@0 | 206 | AppendToString(char16_t(' '), aStr); |
michael@0 | 207 | mAddSpace = false; |
michael@0 | 208 | } |
michael@0 | 209 | } |
michael@0 | 210 | else if (mAddSpace) { |
michael@0 | 211 | AppendToString(char16_t(' '), aStr); |
michael@0 | 212 | mAddSpace = false; |
michael@0 | 213 | } |
michael@0 | 214 | else { |
michael@0 | 215 | MaybeAddNewlineForRootNode(aStr); |
michael@0 | 216 | } |
michael@0 | 217 | // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't |
michael@0 | 218 | // called |
michael@0 | 219 | mAddNewlineForRootNode = false; |
michael@0 | 220 | |
michael@0 | 221 | AppendToString(kLessThan, aStr); |
michael@0 | 222 | |
michael@0 | 223 | AppendToString(nsDependentAtomString(name), aStr); |
michael@0 | 224 | |
michael@0 | 225 | MaybeEnterInPreContent(content); |
michael@0 | 226 | |
michael@0 | 227 | // for block elements, we increase the indentation |
michael@0 | 228 | if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) |
michael@0 | 229 | IncrIndentation(name); |
michael@0 | 230 | |
michael@0 | 231 | // Need to keep track of OL and LI elements in order to get ordinal number |
michael@0 | 232 | // for the LI. |
michael@0 | 233 | if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){ |
michael@0 | 234 | // We are copying and current node is an OL; |
michael@0 | 235 | // Store its start attribute value in olState->startVal. |
michael@0 | 236 | nsAutoString start; |
michael@0 | 237 | int32_t startAttrVal = 0; |
michael@0 | 238 | |
michael@0 | 239 | aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start); |
michael@0 | 240 | if (!start.IsEmpty()){ |
michael@0 | 241 | nsresult rv = NS_OK; |
michael@0 | 242 | startAttrVal = start.ToInteger(&rv); |
michael@0 | 243 | //If OL has "start" attribute, first LI element has to start with that value |
michael@0 | 244 | //Therefore subtracting 1 as all the LI elements are incrementing it before using it; |
michael@0 | 245 | //In failure of ToInteger(), default StartAttrValue to 0. |
michael@0 | 246 | if (NS_SUCCEEDED(rv)) |
michael@0 | 247 | startAttrVal--; |
michael@0 | 248 | else |
michael@0 | 249 | startAttrVal = 0; |
michael@0 | 250 | } |
michael@0 | 251 | mOLStateStack.AppendElement(olState(startAttrVal, true)); |
michael@0 | 252 | } |
michael@0 | 253 | |
michael@0 | 254 | if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) { |
michael@0 | 255 | mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement); |
michael@0 | 256 | if (mIsFirstChildOfOL){ |
michael@0 | 257 | // If OL is parent of this LI, serialize attributes in different manner. |
michael@0 | 258 | SerializeLIValueAttribute(aElement, aStr); |
michael@0 | 259 | } |
michael@0 | 260 | } |
michael@0 | 261 | |
michael@0 | 262 | // Even LI passed above have to go through this |
michael@0 | 263 | // for serializing attributes other than "value". |
michael@0 | 264 | nsAutoString dummyPrefix; |
michael@0 | 265 | SerializeHTMLAttributes(content, |
michael@0 | 266 | aOriginalElement, |
michael@0 | 267 | dummyPrefix, |
michael@0 | 268 | EmptyString(), |
michael@0 | 269 | name, |
michael@0 | 270 | ns, |
michael@0 | 271 | aStr); |
michael@0 | 272 | |
michael@0 | 273 | AppendToString(kGreaterThan, aStr); |
michael@0 | 274 | |
michael@0 | 275 | if (ns == kNameSpaceID_XHTML && |
michael@0 | 276 | (name == nsGkAtoms::script || |
michael@0 | 277 | name == nsGkAtoms::style || |
michael@0 | 278 | name == nsGkAtoms::noscript || |
michael@0 | 279 | name == nsGkAtoms::noframes)) { |
michael@0 | 280 | ++mDisableEntityEncoding; |
michael@0 | 281 | } |
michael@0 | 282 | |
michael@0 | 283 | if ((mDoFormat || forceFormat) && !mPreLevel && |
michael@0 | 284 | !mDoRaw && LineBreakAfterOpen(ns, name)) { |
michael@0 | 285 | AppendNewLineToString(aStr); |
michael@0 | 286 | } |
michael@0 | 287 | |
michael@0 | 288 | AfterElementStart(content, aOriginalElement, aStr); |
michael@0 | 289 | |
michael@0 | 290 | return NS_OK; |
michael@0 | 291 | } |
michael@0 | 292 | |
michael@0 | 293 | NS_IMETHODIMP |
michael@0 | 294 | nsHTMLContentSerializer::AppendElementEnd(Element* aElement, |
michael@0 | 295 | nsAString& aStr) |
michael@0 | 296 | { |
michael@0 | 297 | NS_ENSURE_ARG(aElement); |
michael@0 | 298 | |
michael@0 | 299 | nsIContent* content = aElement; |
michael@0 | 300 | |
michael@0 | 301 | nsIAtom *name = content->Tag(); |
michael@0 | 302 | int32_t ns = content->GetNameSpaceID(); |
michael@0 | 303 | |
michael@0 | 304 | if (ns == kNameSpaceID_XHTML && |
michael@0 | 305 | (name == nsGkAtoms::script || |
michael@0 | 306 | name == nsGkAtoms::style || |
michael@0 | 307 | name == nsGkAtoms::noscript || |
michael@0 | 308 | name == nsGkAtoms::noframes)) { |
michael@0 | 309 | --mDisableEntityEncoding; |
michael@0 | 310 | } |
michael@0 | 311 | |
michael@0 | 312 | bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) && |
michael@0 | 313 | content->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty); |
michael@0 | 314 | |
michael@0 | 315 | if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) { |
michael@0 | 316 | DecrIndentation(name); |
michael@0 | 317 | } |
michael@0 | 318 | |
michael@0 | 319 | if (name == nsGkAtoms::script) { |
michael@0 | 320 | nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement); |
michael@0 | 321 | |
michael@0 | 322 | if (script && script->IsMalformed()) { |
michael@0 | 323 | // We're looking at a malformed script tag. This means that the end tag |
michael@0 | 324 | // was missing in the source. Imitate that here by not serializing the end |
michael@0 | 325 | // tag. |
michael@0 | 326 | --mPreLevel; |
michael@0 | 327 | return NS_OK; |
michael@0 | 328 | } |
michael@0 | 329 | } |
michael@0 | 330 | else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) { |
michael@0 | 331 | NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack"); |
michael@0 | 332 | /* Though at this point we must always have an state to be deleted as all |
michael@0 | 333 | the OL opening tags are supposed to push an olState object to the stack*/ |
michael@0 | 334 | if (!mOLStateStack.IsEmpty()) { |
michael@0 | 335 | mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1); |
michael@0 | 336 | } |
michael@0 | 337 | } |
michael@0 | 338 | |
michael@0 | 339 | if (ns == kNameSpaceID_XHTML) { |
michael@0 | 340 | nsIParserService* parserService = nsContentUtils::GetParserService(); |
michael@0 | 341 | |
michael@0 | 342 | if (parserService) { |
michael@0 | 343 | bool isContainer; |
michael@0 | 344 | |
michael@0 | 345 | parserService-> |
michael@0 | 346 | IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name), |
michael@0 | 347 | isContainer); |
michael@0 | 348 | if (!isContainer) { |
michael@0 | 349 | return NS_OK; |
michael@0 | 350 | } |
michael@0 | 351 | } |
michael@0 | 352 | } |
michael@0 | 353 | |
michael@0 | 354 | if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) { |
michael@0 | 355 | |
michael@0 | 356 | bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name); |
michael@0 | 357 | |
michael@0 | 358 | if (mColPos && lineBreakBeforeClose) { |
michael@0 | 359 | AppendNewLineToString(aStr); |
michael@0 | 360 | } |
michael@0 | 361 | if (!mColPos) { |
michael@0 | 362 | AppendIndentation(aStr); |
michael@0 | 363 | } |
michael@0 | 364 | else if (mAddSpace) { |
michael@0 | 365 | AppendToString(char16_t(' '), aStr); |
michael@0 | 366 | mAddSpace = false; |
michael@0 | 367 | } |
michael@0 | 368 | } |
michael@0 | 369 | else if (mAddSpace) { |
michael@0 | 370 | AppendToString(char16_t(' '), aStr); |
michael@0 | 371 | mAddSpace = false; |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | AppendToString(kEndTag, aStr); |
michael@0 | 375 | AppendToString(nsDependentAtomString(name), aStr); |
michael@0 | 376 | AppendToString(kGreaterThan, aStr); |
michael@0 | 377 | |
michael@0 | 378 | MaybeLeaveFromPreContent(content); |
michael@0 | 379 | |
michael@0 | 380 | if ((mDoFormat || forceFormat) && !mPreLevel |
michael@0 | 381 | && !mDoRaw && LineBreakAfterClose(ns, name)) { |
michael@0 | 382 | AppendNewLineToString(aStr); |
michael@0 | 383 | } |
michael@0 | 384 | else { |
michael@0 | 385 | MaybeFlagNewlineForRootNode(aElement); |
michael@0 | 386 | } |
michael@0 | 387 | |
michael@0 | 388 | if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) { |
michael@0 | 389 | --mInBody; |
michael@0 | 390 | } |
michael@0 | 391 | |
michael@0 | 392 | return NS_OK; |
michael@0 | 393 | } |
michael@0 | 394 | |
michael@0 | 395 | static const uint16_t kValNBSP = 160; |
michael@0 | 396 | static const char* kEntities[] = { |
michael@0 | 397 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 398 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 399 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 400 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "&", nullptr, |
michael@0 | 401 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 402 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 403 | "<", nullptr, ">", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 404 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 405 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 406 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 407 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 408 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 409 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 410 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 411 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 412 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 413 | " " |
michael@0 | 414 | }; |
michael@0 | 415 | |
michael@0 | 416 | static const char* kAttrEntities[] = { |
michael@0 | 417 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 418 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 419 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 420 | nullptr, nullptr, nullptr, nullptr, """, nullptr, nullptr, nullptr, "&", nullptr, |
michael@0 | 421 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 422 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 423 | "<", nullptr, ">", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 424 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 425 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 426 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 427 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 428 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 429 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 430 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 431 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 432 | nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
michael@0 | 433 | " " |
michael@0 | 434 | }; |
michael@0 | 435 | |
michael@0 | 436 | uint32_t FindNextBasicEntity(const nsAString& aStr, |
michael@0 | 437 | const uint32_t aLen, |
michael@0 | 438 | uint32_t aIndex, |
michael@0 | 439 | const char** aEntityTable, |
michael@0 | 440 | const char** aEntity) |
michael@0 | 441 | { |
michael@0 | 442 | for (; aIndex < aLen; ++aIndex) { |
michael@0 | 443 | // for each character in this chunk, check if it |
michael@0 | 444 | // needs to be replaced |
michael@0 | 445 | char16_t val = aStr[aIndex]; |
michael@0 | 446 | if (val <= kValNBSP && aEntityTable[val]) { |
michael@0 | 447 | *aEntity = aEntityTable[val]; |
michael@0 | 448 | return aIndex; |
michael@0 | 449 | } |
michael@0 | 450 | } |
michael@0 | 451 | return aIndex; |
michael@0 | 452 | } |
michael@0 | 453 | |
michael@0 | 454 | void |
michael@0 | 455 | nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr, |
michael@0 | 456 | nsAString& aOutputStr) |
michael@0 | 457 | { |
michael@0 | 458 | if (mBodyOnly && !mInBody) { |
michael@0 | 459 | return; |
michael@0 | 460 | } |
michael@0 | 461 | |
michael@0 | 462 | if (mDisableEntityEncoding) { |
michael@0 | 463 | aOutputStr.Append(aStr); |
michael@0 | 464 | return; |
michael@0 | 465 | } |
michael@0 | 466 | |
michael@0 | 467 | bool nonBasicEntities = |
michael@0 | 468 | !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities | |
michael@0 | 469 | nsIDocumentEncoder::OutputEncodeHTMLEntities | |
michael@0 | 470 | nsIDocumentEncoder::OutputEncodeW3CEntities)); |
michael@0 | 471 | |
michael@0 | 472 | if (!nonBasicEntities && |
michael@0 | 473 | (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) { |
michael@0 | 474 | const char **entityTable = mInAttribute ? kAttrEntities : kEntities; |
michael@0 | 475 | uint32_t start = 0; |
michael@0 | 476 | const uint32_t len = aStr.Length(); |
michael@0 | 477 | for (uint32_t i = 0; i < len; ++i) { |
michael@0 | 478 | const char* entity = nullptr; |
michael@0 | 479 | i = FindNextBasicEntity(aStr, len, i, entityTable, &entity); |
michael@0 | 480 | uint32_t normalTextLen = i - start; |
michael@0 | 481 | if (normalTextLen) { |
michael@0 | 482 | aOutputStr.Append(Substring(aStr, start, normalTextLen)); |
michael@0 | 483 | } |
michael@0 | 484 | if (entity) { |
michael@0 | 485 | aOutputStr.AppendASCII(entity); |
michael@0 | 486 | start = i + 1; |
michael@0 | 487 | } |
michael@0 | 488 | } |
michael@0 | 489 | return; |
michael@0 | 490 | } else if (nonBasicEntities) { |
michael@0 | 491 | nsIParserService* parserService = nsContentUtils::GetParserService(); |
michael@0 | 492 | |
michael@0 | 493 | if (!parserService) { |
michael@0 | 494 | NS_ERROR("Can't get parser service"); |
michael@0 | 495 | return; |
michael@0 | 496 | } |
michael@0 | 497 | |
michael@0 | 498 | nsReadingIterator<char16_t> done_reading; |
michael@0 | 499 | aStr.EndReading(done_reading); |
michael@0 | 500 | |
michael@0 | 501 | // for each chunk of |aString|... |
michael@0 | 502 | uint32_t advanceLength = 0; |
michael@0 | 503 | nsReadingIterator<char16_t> iter; |
michael@0 | 504 | |
michael@0 | 505 | const char **entityTable = mInAttribute ? kAttrEntities : kEntities; |
michael@0 | 506 | nsAutoCString entityReplacement; |
michael@0 | 507 | |
michael@0 | 508 | for (aStr.BeginReading(iter); |
michael@0 | 509 | iter != done_reading; |
michael@0 | 510 | iter.advance(int32_t(advanceLength))) { |
michael@0 | 511 | uint32_t fragmentLength = iter.size_forward(); |
michael@0 | 512 | uint32_t lengthReplaced = 0; // the number of UTF-16 codepoints |
michael@0 | 513 | // replaced by a particular entity |
michael@0 | 514 | const char16_t* c = iter.get(); |
michael@0 | 515 | const char16_t* fragmentStart = c; |
michael@0 | 516 | const char16_t* fragmentEnd = c + fragmentLength; |
michael@0 | 517 | const char* entityText = nullptr; |
michael@0 | 518 | const char* fullConstEntityText = nullptr; |
michael@0 | 519 | char* fullEntityText = nullptr; |
michael@0 | 520 | |
michael@0 | 521 | advanceLength = 0; |
michael@0 | 522 | // for each character in this chunk, check if it |
michael@0 | 523 | // needs to be replaced |
michael@0 | 524 | for (; c < fragmentEnd; c++, advanceLength++) { |
michael@0 | 525 | char16_t val = *c; |
michael@0 | 526 | if (val <= kValNBSP && entityTable[val]) { |
michael@0 | 527 | fullConstEntityText = entityTable[val]; |
michael@0 | 528 | break; |
michael@0 | 529 | } else if (val > 127 && |
michael@0 | 530 | ((val < 256 && |
michael@0 | 531 | mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) || |
michael@0 | 532 | mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) { |
michael@0 | 533 | entityReplacement.Truncate(); |
michael@0 | 534 | parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); |
michael@0 | 535 | |
michael@0 | 536 | if (!entityReplacement.IsEmpty()) { |
michael@0 | 537 | entityText = entityReplacement.get(); |
michael@0 | 538 | break; |
michael@0 | 539 | } |
michael@0 | 540 | } |
michael@0 | 541 | else if (val > 127 && |
michael@0 | 542 | mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities && |
michael@0 | 543 | mEntityConverter) { |
michael@0 | 544 | if (NS_IS_HIGH_SURROGATE(val) && |
michael@0 | 545 | c + 1 < fragmentEnd && |
michael@0 | 546 | NS_IS_LOW_SURROGATE(*(c + 1))) { |
michael@0 | 547 | uint32_t valUTF32 = SURROGATE_TO_UCS4(val, *(++c)); |
michael@0 | 548 | if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32, |
michael@0 | 549 | nsIEntityConverter::entityW3C, &fullEntityText))) { |
michael@0 | 550 | lengthReplaced = 2; |
michael@0 | 551 | break; |
michael@0 | 552 | } |
michael@0 | 553 | else { |
michael@0 | 554 | advanceLength++; |
michael@0 | 555 | } |
michael@0 | 556 | } |
michael@0 | 557 | else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val, |
michael@0 | 558 | nsIEntityConverter::entityW3C, |
michael@0 | 559 | &fullEntityText))) { |
michael@0 | 560 | lengthReplaced = 1; |
michael@0 | 561 | break; |
michael@0 | 562 | } |
michael@0 | 563 | } |
michael@0 | 564 | } |
michael@0 | 565 | |
michael@0 | 566 | aOutputStr.Append(fragmentStart, advanceLength); |
michael@0 | 567 | if (entityText) { |
michael@0 | 568 | aOutputStr.Append(char16_t('&')); |
michael@0 | 569 | AppendASCIItoUTF16(entityText, aOutputStr); |
michael@0 | 570 | aOutputStr.Append(char16_t(';')); |
michael@0 | 571 | advanceLength++; |
michael@0 | 572 | } |
michael@0 | 573 | else if (fullConstEntityText) { |
michael@0 | 574 | aOutputStr.AppendASCII(fullConstEntityText); |
michael@0 | 575 | ++advanceLength; |
michael@0 | 576 | } |
michael@0 | 577 | // if it comes from nsIEntityConverter, it already has '&' and ';' |
michael@0 | 578 | else if (fullEntityText) { |
michael@0 | 579 | AppendASCIItoUTF16(fullEntityText, aOutputStr); |
michael@0 | 580 | nsMemory::Free(fullEntityText); |
michael@0 | 581 | advanceLength += lengthReplaced; |
michael@0 | 582 | } |
michael@0 | 583 | } |
michael@0 | 584 | } else { |
michael@0 | 585 | nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr); |
michael@0 | 586 | } |
michael@0 | 587 | } |