content/base/src/nsHTMLContentSerializer.cpp

Thu, 15 Jan 2015 21:03:48 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 21:03:48 +0100
branch
TOR_BUG_9701
changeset 11
deefc01c0e14
permissions
-rw-r--r--

Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set ts=2 sw=2 et tw=80: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 /*
michael@0 8 * nsIContentSerializer implementation that can be used with an
michael@0 9 * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
michael@0 10 * string that could be parsed into more or less the original DOM.
michael@0 11 */
michael@0 12
michael@0 13 #include "nsHTMLContentSerializer.h"
michael@0 14
michael@0 15 #include "nsIDOMElement.h"
michael@0 16 #include "nsIContent.h"
michael@0 17 #include "nsIDocument.h"
michael@0 18 #include "nsNameSpaceManager.h"
michael@0 19 #include "nsString.h"
michael@0 20 #include "nsUnicharUtils.h"
michael@0 21 #include "nsXPIDLString.h"
michael@0 22 #include "nsIServiceManager.h"
michael@0 23 #include "nsIDocumentEncoder.h"
michael@0 24 #include "nsGkAtoms.h"
michael@0 25 #include "nsIURI.h"
michael@0 26 #include "nsNetUtil.h"
michael@0 27 #include "nsEscape.h"
michael@0 28 #include "nsITextToSubURI.h"
michael@0 29 #include "nsCRT.h"
michael@0 30 #include "nsIParserService.h"
michael@0 31 #include "nsContentUtils.h"
michael@0 32 #include "nsLWBrkCIID.h"
michael@0 33 #include "nsIScriptElement.h"
michael@0 34 #include "nsAttrName.h"
michael@0 35 #include "nsIDocShell.h"
michael@0 36 #include "nsIEditor.h"
michael@0 37 #include "nsIHTMLEditor.h"
michael@0 38 #include "mozilla/dom/Element.h"
michael@0 39 #include "nsParserConstants.h"
michael@0 40
michael@0 41 using namespace mozilla::dom;
michael@0 42
michael@0 43 nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
michael@0 44 {
michael@0 45 nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
michael@0 46 if (!it) {
michael@0 47 return NS_ERROR_OUT_OF_MEMORY;
michael@0 48 }
michael@0 49
michael@0 50 return CallQueryInterface(it, aSerializer);
michael@0 51 }
michael@0 52
michael@0 53 nsHTMLContentSerializer::nsHTMLContentSerializer()
michael@0 54 {
michael@0 55 mIsHTMLSerializer = true;
michael@0 56 }
michael@0 57
michael@0 58 nsHTMLContentSerializer::~nsHTMLContentSerializer()
michael@0 59 {
michael@0 60 }
michael@0 61
michael@0 62
michael@0 63 NS_IMETHODIMP
michael@0 64 nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
michael@0 65 nsAString& aStr)
michael@0 66 {
michael@0 67 return NS_OK;
michael@0 68 }
michael@0 69
michael@0 70 void
michael@0 71 nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent,
michael@0 72 nsIContent *aOriginalElement,
michael@0 73 nsAString& aTagPrefix,
michael@0 74 const nsAString& aTagNamespaceURI,
michael@0 75 nsIAtom* aTagName,
michael@0 76 int32_t aNamespace,
michael@0 77 nsAString& aStr)
michael@0 78 {
michael@0 79 int32_t count = aContent->GetAttrCount();
michael@0 80 if (!count)
michael@0 81 return;
michael@0 82
michael@0 83 nsresult rv;
michael@0 84 nsAutoString valueStr;
michael@0 85 NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
michael@0 86
michael@0 87 for (int32_t index = count; index > 0;) {
michael@0 88 --index;
michael@0 89 const nsAttrName* name = aContent->GetAttrNameAt(index);
michael@0 90 int32_t namespaceID = name->NamespaceID();
michael@0 91 nsIAtom* attrName = name->LocalName();
michael@0 92
michael@0 93 // Filter out any attribute starting with [-|_]moz
michael@0 94 nsDependentAtomString attrNameStr(attrName);
michael@0 95 if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
michael@0 96 StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
michael@0 97 continue;
michael@0 98 }
michael@0 99 aContent->GetAttr(namespaceID, attrName, valueStr);
michael@0 100
michael@0 101 //
michael@0 102 // Filter out special case of <br type="_moz"> or <br _moz*>,
michael@0 103 // used by the editor. Bug 16988. Yuck.
michael@0 104 //
michael@0 105 if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML &&
michael@0 106 attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None &&
michael@0 107 StringBeginsWith(valueStr, _mozStr)) {
michael@0 108 continue;
michael@0 109 }
michael@0 110
michael@0 111 if (mIsCopying && mIsFirstChildOfOL &&
michael@0 112 aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML &&
michael@0 113 attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){
michael@0 114 // This is handled separately in SerializeLIValueAttribute()
michael@0 115 continue;
michael@0 116 }
michael@0 117 bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
michael@0 118
michael@0 119 if (((attrName == nsGkAtoms::href &&
michael@0 120 (namespaceID == kNameSpaceID_None ||
michael@0 121 namespaceID == kNameSpaceID_XLink)) ||
michael@0 122 (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
michael@0 123 // Make all links absolute when converting only the selection:
michael@0 124 if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
michael@0 125 // Would be nice to handle OBJECT and APPLET tags,
michael@0 126 // but that gets more complicated since we have to
michael@0 127 // search the tag list for CODEBASE as well.
michael@0 128 // For now, just leave them relative.
michael@0 129 nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
michael@0 130 if (uri) {
michael@0 131 nsAutoString absURI;
michael@0 132 rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
michael@0 133 if (NS_SUCCEEDED(rv)) {
michael@0 134 valueStr = absURI;
michael@0 135 }
michael@0 136 }
michael@0 137 }
michael@0 138 // Need to escape URI.
michael@0 139 nsAutoString tempURI(valueStr);
michael@0 140 if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
michael@0 141 valueStr = tempURI;
michael@0 142 }
michael@0 143
michael@0 144 if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
michael@0 145 aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
michael@0 146 && namespaceID == kNameSpaceID_None) {
michael@0 147 // If we're serializing a <meta http-equiv="content-type">,
michael@0 148 // use the proper value, rather than what's in the document.
michael@0 149 nsAutoString header;
michael@0 150 aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
michael@0 151 if (header.LowerCaseEqualsLiteral("content-type")) {
michael@0 152 valueStr = NS_LITERAL_STRING("text/html; charset=") +
michael@0 153 NS_ConvertASCIItoUTF16(mCharset);
michael@0 154 }
michael@0 155 }
michael@0 156
michael@0 157 nsDependentAtomString nameStr(attrName);
michael@0 158 nsAutoString prefix;
michael@0 159 if (namespaceID == kNameSpaceID_XML) {
michael@0 160 prefix.Assign(NS_LITERAL_STRING("xml"));
michael@0 161 } else if (namespaceID == kNameSpaceID_XLink) {
michael@0 162 prefix.Assign(NS_LITERAL_STRING("xlink"));
michael@0 163 }
michael@0 164
michael@0 165 // Expand shorthand attribute.
michael@0 166 if (aNamespace == kNameSpaceID_XHTML &&
michael@0 167 namespaceID == kNameSpaceID_None &&
michael@0 168 IsShorthandAttr(attrName, aTagName) &&
michael@0 169 valueStr.IsEmpty()) {
michael@0 170 valueStr = nameStr;
michael@0 171 }
michael@0 172 SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS);
michael@0 173 }
michael@0 174 }
michael@0 175
michael@0 176 NS_IMETHODIMP
michael@0 177 nsHTMLContentSerializer::AppendElementStart(Element* aElement,
michael@0 178 Element* aOriginalElement,
michael@0 179 nsAString& aStr)
michael@0 180 {
michael@0 181 NS_ENSURE_ARG(aElement);
michael@0 182
michael@0 183 nsIContent* content = aElement;
michael@0 184
michael@0 185 bool forceFormat = false;
michael@0 186 if (!CheckElementStart(content, forceFormat, aStr)) {
michael@0 187 return NS_OK;
michael@0 188 }
michael@0 189
michael@0 190 nsIAtom *name = content->Tag();
michael@0 191 int32_t ns = content->GetNameSpaceID();
michael@0 192
michael@0 193 bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
michael@0 194
michael@0 195 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0 196 if (mColPos && lineBreakBeforeOpen) {
michael@0 197 AppendNewLineToString(aStr);
michael@0 198 }
michael@0 199 else {
michael@0 200 MaybeAddNewlineForRootNode(aStr);
michael@0 201 }
michael@0 202 if (!mColPos) {
michael@0 203 AppendIndentation(aStr);
michael@0 204 }
michael@0 205 else if (mAddSpace) {
michael@0 206 AppendToString(char16_t(' '), aStr);
michael@0 207 mAddSpace = false;
michael@0 208 }
michael@0 209 }
michael@0 210 else if (mAddSpace) {
michael@0 211 AppendToString(char16_t(' '), aStr);
michael@0 212 mAddSpace = false;
michael@0 213 }
michael@0 214 else {
michael@0 215 MaybeAddNewlineForRootNode(aStr);
michael@0 216 }
michael@0 217 // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
michael@0 218 // called
michael@0 219 mAddNewlineForRootNode = false;
michael@0 220
michael@0 221 AppendToString(kLessThan, aStr);
michael@0 222
michael@0 223 AppendToString(nsDependentAtomString(name), aStr);
michael@0 224
michael@0 225 MaybeEnterInPreContent(content);
michael@0 226
michael@0 227 // for block elements, we increase the indentation
michael@0 228 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw)
michael@0 229 IncrIndentation(name);
michael@0 230
michael@0 231 // Need to keep track of OL and LI elements in order to get ordinal number
michael@0 232 // for the LI.
michael@0 233 if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){
michael@0 234 // We are copying and current node is an OL;
michael@0 235 // Store its start attribute value in olState->startVal.
michael@0 236 nsAutoString start;
michael@0 237 int32_t startAttrVal = 0;
michael@0 238
michael@0 239 aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
michael@0 240 if (!start.IsEmpty()){
michael@0 241 nsresult rv = NS_OK;
michael@0 242 startAttrVal = start.ToInteger(&rv);
michael@0 243 //If OL has "start" attribute, first LI element has to start with that value
michael@0 244 //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
michael@0 245 //In failure of ToInteger(), default StartAttrValue to 0.
michael@0 246 if (NS_SUCCEEDED(rv))
michael@0 247 startAttrVal--;
michael@0 248 else
michael@0 249 startAttrVal = 0;
michael@0 250 }
michael@0 251 mOLStateStack.AppendElement(olState(startAttrVal, true));
michael@0 252 }
michael@0 253
michael@0 254 if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
michael@0 255 mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
michael@0 256 if (mIsFirstChildOfOL){
michael@0 257 // If OL is parent of this LI, serialize attributes in different manner.
michael@0 258 SerializeLIValueAttribute(aElement, aStr);
michael@0 259 }
michael@0 260 }
michael@0 261
michael@0 262 // Even LI passed above have to go through this
michael@0 263 // for serializing attributes other than "value".
michael@0 264 nsAutoString dummyPrefix;
michael@0 265 SerializeHTMLAttributes(content,
michael@0 266 aOriginalElement,
michael@0 267 dummyPrefix,
michael@0 268 EmptyString(),
michael@0 269 name,
michael@0 270 ns,
michael@0 271 aStr);
michael@0 272
michael@0 273 AppendToString(kGreaterThan, aStr);
michael@0 274
michael@0 275 if (ns == kNameSpaceID_XHTML &&
michael@0 276 (name == nsGkAtoms::script ||
michael@0 277 name == nsGkAtoms::style ||
michael@0 278 name == nsGkAtoms::noscript ||
michael@0 279 name == nsGkAtoms::noframes)) {
michael@0 280 ++mDisableEntityEncoding;
michael@0 281 }
michael@0 282
michael@0 283 if ((mDoFormat || forceFormat) && !mPreLevel &&
michael@0 284 !mDoRaw && LineBreakAfterOpen(ns, name)) {
michael@0 285 AppendNewLineToString(aStr);
michael@0 286 }
michael@0 287
michael@0 288 AfterElementStart(content, aOriginalElement, aStr);
michael@0 289
michael@0 290 return NS_OK;
michael@0 291 }
michael@0 292
michael@0 293 NS_IMETHODIMP
michael@0 294 nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
michael@0 295 nsAString& aStr)
michael@0 296 {
michael@0 297 NS_ENSURE_ARG(aElement);
michael@0 298
michael@0 299 nsIContent* content = aElement;
michael@0 300
michael@0 301 nsIAtom *name = content->Tag();
michael@0 302 int32_t ns = content->GetNameSpaceID();
michael@0 303
michael@0 304 if (ns == kNameSpaceID_XHTML &&
michael@0 305 (name == nsGkAtoms::script ||
michael@0 306 name == nsGkAtoms::style ||
michael@0 307 name == nsGkAtoms::noscript ||
michael@0 308 name == nsGkAtoms::noframes)) {
michael@0 309 --mDisableEntityEncoding;
michael@0 310 }
michael@0 311
michael@0 312 bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
michael@0 313 content->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
michael@0 314
michael@0 315 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0 316 DecrIndentation(name);
michael@0 317 }
michael@0 318
michael@0 319 if (name == nsGkAtoms::script) {
michael@0 320 nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
michael@0 321
michael@0 322 if (script && script->IsMalformed()) {
michael@0 323 // We're looking at a malformed script tag. This means that the end tag
michael@0 324 // was missing in the source. Imitate that here by not serializing the end
michael@0 325 // tag.
michael@0 326 --mPreLevel;
michael@0 327 return NS_OK;
michael@0 328 }
michael@0 329 }
michael@0 330 else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
michael@0 331 NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
michael@0 332 /* Though at this point we must always have an state to be deleted as all
michael@0 333 the OL opening tags are supposed to push an olState object to the stack*/
michael@0 334 if (!mOLStateStack.IsEmpty()) {
michael@0 335 mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
michael@0 336 }
michael@0 337 }
michael@0 338
michael@0 339 if (ns == kNameSpaceID_XHTML) {
michael@0 340 nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0 341
michael@0 342 if (parserService) {
michael@0 343 bool isContainer;
michael@0 344
michael@0 345 parserService->
michael@0 346 IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
michael@0 347 isContainer);
michael@0 348 if (!isContainer) {
michael@0 349 return NS_OK;
michael@0 350 }
michael@0 351 }
michael@0 352 }
michael@0 353
michael@0 354 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0 355
michael@0 356 bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
michael@0 357
michael@0 358 if (mColPos && lineBreakBeforeClose) {
michael@0 359 AppendNewLineToString(aStr);
michael@0 360 }
michael@0 361 if (!mColPos) {
michael@0 362 AppendIndentation(aStr);
michael@0 363 }
michael@0 364 else if (mAddSpace) {
michael@0 365 AppendToString(char16_t(' '), aStr);
michael@0 366 mAddSpace = false;
michael@0 367 }
michael@0 368 }
michael@0 369 else if (mAddSpace) {
michael@0 370 AppendToString(char16_t(' '), aStr);
michael@0 371 mAddSpace = false;
michael@0 372 }
michael@0 373
michael@0 374 AppendToString(kEndTag, aStr);
michael@0 375 AppendToString(nsDependentAtomString(name), aStr);
michael@0 376 AppendToString(kGreaterThan, aStr);
michael@0 377
michael@0 378 MaybeLeaveFromPreContent(content);
michael@0 379
michael@0 380 if ((mDoFormat || forceFormat) && !mPreLevel
michael@0 381 && !mDoRaw && LineBreakAfterClose(ns, name)) {
michael@0 382 AppendNewLineToString(aStr);
michael@0 383 }
michael@0 384 else {
michael@0 385 MaybeFlagNewlineForRootNode(aElement);
michael@0 386 }
michael@0 387
michael@0 388 if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
michael@0 389 --mInBody;
michael@0 390 }
michael@0 391
michael@0 392 return NS_OK;
michael@0 393 }
michael@0 394
michael@0 395 static const uint16_t kValNBSP = 160;
michael@0 396 static const char* kEntities[] = {
michael@0 397 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 398 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 399 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 400 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "&amp;", nullptr,
michael@0 401 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 402 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 403 "&lt;", nullptr, "&gt;", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 404 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 405 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 406 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 407 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 408 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 409 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 410 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 411 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 412 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 413 "&nbsp;"
michael@0 414 };
michael@0 415
michael@0 416 static const char* kAttrEntities[] = {
michael@0 417 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 418 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 419 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 420 nullptr, nullptr, nullptr, nullptr, "&quot;", nullptr, nullptr, nullptr, "&amp;", nullptr,
michael@0 421 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 422 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 423 "&lt;", nullptr, "&gt;", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 424 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 425 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 426 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 427 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 428 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 429 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 430 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 431 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 432 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0 433 "&nbsp;"
michael@0 434 };
michael@0 435
michael@0 436 uint32_t FindNextBasicEntity(const nsAString& aStr,
michael@0 437 const uint32_t aLen,
michael@0 438 uint32_t aIndex,
michael@0 439 const char** aEntityTable,
michael@0 440 const char** aEntity)
michael@0 441 {
michael@0 442 for (; aIndex < aLen; ++aIndex) {
michael@0 443 // for each character in this chunk, check if it
michael@0 444 // needs to be replaced
michael@0 445 char16_t val = aStr[aIndex];
michael@0 446 if (val <= kValNBSP && aEntityTable[val]) {
michael@0 447 *aEntity = aEntityTable[val];
michael@0 448 return aIndex;
michael@0 449 }
michael@0 450 }
michael@0 451 return aIndex;
michael@0 452 }
michael@0 453
michael@0 454 void
michael@0 455 nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
michael@0 456 nsAString& aOutputStr)
michael@0 457 {
michael@0 458 if (mBodyOnly && !mInBody) {
michael@0 459 return;
michael@0 460 }
michael@0 461
michael@0 462 if (mDisableEntityEncoding) {
michael@0 463 aOutputStr.Append(aStr);
michael@0 464 return;
michael@0 465 }
michael@0 466
michael@0 467 bool nonBasicEntities =
michael@0 468 !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities |
michael@0 469 nsIDocumentEncoder::OutputEncodeHTMLEntities |
michael@0 470 nsIDocumentEncoder::OutputEncodeW3CEntities));
michael@0 471
michael@0 472 if (!nonBasicEntities &&
michael@0 473 (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) {
michael@0 474 const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
michael@0 475 uint32_t start = 0;
michael@0 476 const uint32_t len = aStr.Length();
michael@0 477 for (uint32_t i = 0; i < len; ++i) {
michael@0 478 const char* entity = nullptr;
michael@0 479 i = FindNextBasicEntity(aStr, len, i, entityTable, &entity);
michael@0 480 uint32_t normalTextLen = i - start;
michael@0 481 if (normalTextLen) {
michael@0 482 aOutputStr.Append(Substring(aStr, start, normalTextLen));
michael@0 483 }
michael@0 484 if (entity) {
michael@0 485 aOutputStr.AppendASCII(entity);
michael@0 486 start = i + 1;
michael@0 487 }
michael@0 488 }
michael@0 489 return;
michael@0 490 } else if (nonBasicEntities) {
michael@0 491 nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0 492
michael@0 493 if (!parserService) {
michael@0 494 NS_ERROR("Can't get parser service");
michael@0 495 return;
michael@0 496 }
michael@0 497
michael@0 498 nsReadingIterator<char16_t> done_reading;
michael@0 499 aStr.EndReading(done_reading);
michael@0 500
michael@0 501 // for each chunk of |aString|...
michael@0 502 uint32_t advanceLength = 0;
michael@0 503 nsReadingIterator<char16_t> iter;
michael@0 504
michael@0 505 const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
michael@0 506 nsAutoCString entityReplacement;
michael@0 507
michael@0 508 for (aStr.BeginReading(iter);
michael@0 509 iter != done_reading;
michael@0 510 iter.advance(int32_t(advanceLength))) {
michael@0 511 uint32_t fragmentLength = iter.size_forward();
michael@0 512 uint32_t lengthReplaced = 0; // the number of UTF-16 codepoints
michael@0 513 // replaced by a particular entity
michael@0 514 const char16_t* c = iter.get();
michael@0 515 const char16_t* fragmentStart = c;
michael@0 516 const char16_t* fragmentEnd = c + fragmentLength;
michael@0 517 const char* entityText = nullptr;
michael@0 518 const char* fullConstEntityText = nullptr;
michael@0 519 char* fullEntityText = nullptr;
michael@0 520
michael@0 521 advanceLength = 0;
michael@0 522 // for each character in this chunk, check if it
michael@0 523 // needs to be replaced
michael@0 524 for (; c < fragmentEnd; c++, advanceLength++) {
michael@0 525 char16_t val = *c;
michael@0 526 if (val <= kValNBSP && entityTable[val]) {
michael@0 527 fullConstEntityText = entityTable[val];
michael@0 528 break;
michael@0 529 } else if (val > 127 &&
michael@0 530 ((val < 256 &&
michael@0 531 mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
michael@0 532 mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
michael@0 533 entityReplacement.Truncate();
michael@0 534 parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
michael@0 535
michael@0 536 if (!entityReplacement.IsEmpty()) {
michael@0 537 entityText = entityReplacement.get();
michael@0 538 break;
michael@0 539 }
michael@0 540 }
michael@0 541 else if (val > 127 &&
michael@0 542 mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
michael@0 543 mEntityConverter) {
michael@0 544 if (NS_IS_HIGH_SURROGATE(val) &&
michael@0 545 c + 1 < fragmentEnd &&
michael@0 546 NS_IS_LOW_SURROGATE(*(c + 1))) {
michael@0 547 uint32_t valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
michael@0 548 if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
michael@0 549 nsIEntityConverter::entityW3C, &fullEntityText))) {
michael@0 550 lengthReplaced = 2;
michael@0 551 break;
michael@0 552 }
michael@0 553 else {
michael@0 554 advanceLength++;
michael@0 555 }
michael@0 556 }
michael@0 557 else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
michael@0 558 nsIEntityConverter::entityW3C,
michael@0 559 &fullEntityText))) {
michael@0 560 lengthReplaced = 1;
michael@0 561 break;
michael@0 562 }
michael@0 563 }
michael@0 564 }
michael@0 565
michael@0 566 aOutputStr.Append(fragmentStart, advanceLength);
michael@0 567 if (entityText) {
michael@0 568 aOutputStr.Append(char16_t('&'));
michael@0 569 AppendASCIItoUTF16(entityText, aOutputStr);
michael@0 570 aOutputStr.Append(char16_t(';'));
michael@0 571 advanceLength++;
michael@0 572 }
michael@0 573 else if (fullConstEntityText) {
michael@0 574 aOutputStr.AppendASCII(fullConstEntityText);
michael@0 575 ++advanceLength;
michael@0 576 }
michael@0 577 // if it comes from nsIEntityConverter, it already has '&' and ';'
michael@0 578 else if (fullEntityText) {
michael@0 579 AppendASCIItoUTF16(fullEntityText, aOutputStr);
michael@0 580 nsMemory::Free(fullEntityText);
michael@0 581 advanceLength += lengthReplaced;
michael@0 582 }
michael@0 583 }
michael@0 584 } else {
michael@0 585 nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
michael@0 586 }
michael@0 587 }

mercurial