content/base/src/nsXHTMLContentSerializer.cpp

Thu, 15 Jan 2015 21:03:48 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 21:03:48 +0100
branch
TOR_BUG_9701
changeset 11
deefc01c0e14
permissions
-rw-r--r--

Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set ts=2 sw=2 et tw=80: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 /*
michael@0 8 * nsIContentSerializer implementation that can be used with an
michael@0 9 * nsIDocumentEncoder to convert an XHTML (not HTML!) DOM to an XHTML
michael@0 10 * string that could be parsed into more or less the original DOM.
michael@0 11 */
michael@0 12
michael@0 13 #include "nsXHTMLContentSerializer.h"
michael@0 14
michael@0 15 #include "nsIDOMElement.h"
michael@0 16 #include "nsIContent.h"
michael@0 17 #include "nsIDocument.h"
michael@0 18 #include "nsNameSpaceManager.h"
michael@0 19 #include "nsString.h"
michael@0 20 #include "nsUnicharUtils.h"
michael@0 21 #include "nsXPIDLString.h"
michael@0 22 #include "nsIServiceManager.h"
michael@0 23 #include "nsIDocumentEncoder.h"
michael@0 24 #include "nsGkAtoms.h"
michael@0 25 #include "nsIURI.h"
michael@0 26 #include "nsNetUtil.h"
michael@0 27 #include "nsEscape.h"
michael@0 28 #include "nsITextToSubURI.h"
michael@0 29 #include "nsCRT.h"
michael@0 30 #include "nsIParserService.h"
michael@0 31 #include "nsContentUtils.h"
michael@0 32 #include "nsLWBrkCIID.h"
michael@0 33 #include "nsIScriptElement.h"
michael@0 34 #include "nsAttrName.h"
michael@0 35 #include "nsParserConstants.h"
michael@0 36
michael@0 37 static const int32_t kLongLineLen = 128;
michael@0 38
michael@0 39 #define kXMLNS "xmlns"
michael@0 40
michael@0 41 nsresult NS_NewXHTMLContentSerializer(nsIContentSerializer** aSerializer)
michael@0 42 {
michael@0 43 nsXHTMLContentSerializer* it = new nsXHTMLContentSerializer();
michael@0 44 if (!it) {
michael@0 45 return NS_ERROR_OUT_OF_MEMORY;
michael@0 46 }
michael@0 47
michael@0 48 return CallQueryInterface(it, aSerializer);
michael@0 49 }
michael@0 50
michael@0 51 nsXHTMLContentSerializer::nsXHTMLContentSerializer()
michael@0 52 : mIsHTMLSerializer(false)
michael@0 53 {
michael@0 54 }
michael@0 55
michael@0 56 nsXHTMLContentSerializer::~nsXHTMLContentSerializer()
michael@0 57 {
michael@0 58 NS_ASSERTION(mOLStateStack.IsEmpty(), "Expected OL State stack to be empty");
michael@0 59 }
michael@0 60
michael@0 61 NS_IMETHODIMP
michael@0 62 nsXHTMLContentSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
michael@0 63 const char* aCharSet, bool aIsCopying,
michael@0 64 bool aRewriteEncodingDeclaration)
michael@0 65 {
michael@0 66 // The previous version of the HTML serializer did implicit wrapping
michael@0 67 // when there is no flags, so we keep wrapping in order to keep
michael@0 68 // compatibility with the existing calling code
michael@0 69 // XXXLJ perhaps should we remove this default settings later ?
michael@0 70 if (aFlags & nsIDocumentEncoder::OutputFormatted ) {
michael@0 71 aFlags = aFlags | nsIDocumentEncoder::OutputWrap;
michael@0 72 }
michael@0 73
michael@0 74 nsresult rv;
michael@0 75 rv = nsXMLContentSerializer::Init(aFlags, aWrapColumn, aCharSet, aIsCopying, aRewriteEncodingDeclaration);
michael@0 76 NS_ENSURE_SUCCESS(rv, rv);
michael@0 77
michael@0 78 mRewriteEncodingDeclaration = aRewriteEncodingDeclaration;
michael@0 79 mIsCopying = aIsCopying;
michael@0 80 mIsFirstChildOfOL = false;
michael@0 81 mInBody = 0;
michael@0 82 mDisableEntityEncoding = 0;
michael@0 83 mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? true
michael@0 84 : false;
michael@0 85
michael@0 86 // set up entity converter if we are going to need it
michael@0 87 if (mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities) {
michael@0 88 mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID);
michael@0 89 }
michael@0 90 return NS_OK;
michael@0 91 }
michael@0 92
michael@0 93
michael@0 94 // See if the string has any lines longer than longLineLen:
michael@0 95 // if so, we presume formatting is wonky (e.g. the node has been edited)
michael@0 96 // and we'd better rewrap the whole text node.
michael@0 97 bool
michael@0 98 nsXHTMLContentSerializer::HasLongLines(const nsString& text, int32_t& aLastNewlineOffset)
michael@0 99 {
michael@0 100 uint32_t start=0;
michael@0 101 uint32_t theLen = text.Length();
michael@0 102 bool rv = false;
michael@0 103 aLastNewlineOffset = kNotFound;
michael@0 104 for (start = 0; start < theLen; ) {
michael@0 105 int32_t eol = text.FindChar('\n', start);
michael@0 106 if (eol < 0) {
michael@0 107 eol = text.Length();
michael@0 108 }
michael@0 109 else {
michael@0 110 aLastNewlineOffset = eol;
michael@0 111 }
michael@0 112 if (int32_t(eol - start) > kLongLineLen)
michael@0 113 rv = true;
michael@0 114 start = eol + 1;
michael@0 115 }
michael@0 116 return rv;
michael@0 117 }
michael@0 118
michael@0 119 NS_IMETHODIMP
michael@0 120 nsXHTMLContentSerializer::AppendText(nsIContent* aText,
michael@0 121 int32_t aStartOffset,
michael@0 122 int32_t aEndOffset,
michael@0 123 nsAString& aStr)
michael@0 124 {
michael@0 125 NS_ENSURE_ARG(aText);
michael@0 126
michael@0 127 nsAutoString data;
michael@0 128 nsresult rv;
michael@0 129
michael@0 130 rv = AppendTextData(aText, aStartOffset, aEndOffset, data, true);
michael@0 131 if (NS_FAILED(rv))
michael@0 132 return NS_ERROR_FAILURE;
michael@0 133
michael@0 134 if (mPreLevel > 0 || mDoRaw) {
michael@0 135 AppendToStringConvertLF(data, aStr);
michael@0 136 }
michael@0 137 else if (mDoFormat) {
michael@0 138 AppendToStringFormatedWrapped(data, aStr);
michael@0 139 }
michael@0 140 else if (mDoWrap) {
michael@0 141 AppendToStringWrapped(data, aStr);
michael@0 142 }
michael@0 143 else {
michael@0 144 int32_t lastNewlineOffset = kNotFound;
michael@0 145 if (HasLongLines(data, lastNewlineOffset)) {
michael@0 146 // We have long lines, rewrap
michael@0 147 mDoWrap = true;
michael@0 148 AppendToStringWrapped(data, aStr);
michael@0 149 mDoWrap = false;
michael@0 150 }
michael@0 151 else {
michael@0 152 AppendToStringConvertLF(data, aStr);
michael@0 153 }
michael@0 154 }
michael@0 155
michael@0 156 return NS_OK;
michael@0 157 }
michael@0 158
michael@0 159 nsresult
michael@0 160 nsXHTMLContentSerializer::EscapeURI(nsIContent* aContent, const nsAString& aURI, nsAString& aEscapedURI)
michael@0 161 {
michael@0 162 // URL escape %xx cannot be used in JS.
michael@0 163 // No escaping if the scheme is 'javascript'.
michael@0 164 if (IsJavaScript(aContent, nsGkAtoms::href, kNameSpaceID_None, aURI)) {
michael@0 165 aEscapedURI = aURI;
michael@0 166 return NS_OK;
michael@0 167 }
michael@0 168
michael@0 169 // nsITextToSubURI does charset convert plus uri escape
michael@0 170 // This is needed to convert to a document charset which is needed to support existing browsers.
michael@0 171 // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
michael@0 172 // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
michael@0 173 nsCOMPtr<nsITextToSubURI> textToSubURI;
michael@0 174 nsAutoString uri(aURI); // in order to use FindCharInSet()
michael@0 175 nsresult rv = NS_OK;
michael@0 176
michael@0 177 if (!mCharset.IsEmpty() && !IsASCII(uri)) {
michael@0 178 textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
michael@0 179 NS_ENSURE_SUCCESS(rv, rv);
michael@0 180 }
michael@0 181
michael@0 182 int32_t start = 0;
michael@0 183 int32_t end;
michael@0 184 nsAutoString part;
michael@0 185 nsXPIDLCString escapedURI;
michael@0 186 aEscapedURI.Truncate(0);
michael@0 187
michael@0 188 // Loop and escape parts by avoiding escaping reserved characters
michael@0 189 // (and '%', '#', as well as '[' and ']' for IPv6 address literals).
michael@0 190 while ((end = uri.FindCharInSet("%#;/?:@&=+$,[]", start)) != -1) {
michael@0 191 part = Substring(aURI, start, (end-start));
michael@0 192 if (textToSubURI && !IsASCII(part)) {
michael@0 193 rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
michael@0 194 NS_ENSURE_SUCCESS(rv, rv);
michael@0 195 }
michael@0 196 else {
michael@0 197 escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
michael@0 198 }
michael@0 199 AppendASCIItoUTF16(escapedURI, aEscapedURI);
michael@0 200
michael@0 201 // Append a reserved character without escaping.
michael@0 202 part = Substring(aURI, end, 1);
michael@0 203 aEscapedURI.Append(part);
michael@0 204 start = end + 1;
michael@0 205 }
michael@0 206
michael@0 207 if (start < (int32_t) aURI.Length()) {
michael@0 208 // Escape the remaining part.
michael@0 209 part = Substring(aURI, start, aURI.Length()-start);
michael@0 210 if (textToSubURI) {
michael@0 211 rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
michael@0 212 NS_ENSURE_SUCCESS(rv, rv);
michael@0 213 }
michael@0 214 else {
michael@0 215 escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
michael@0 216 }
michael@0 217 AppendASCIItoUTF16(escapedURI, aEscapedURI);
michael@0 218 }
michael@0 219
michael@0 220 return rv;
michael@0 221 }
michael@0 222
michael@0 223 void
michael@0 224 nsXHTMLContentSerializer::SerializeAttributes(nsIContent* aContent,
michael@0 225 nsIContent *aOriginalElement,
michael@0 226 nsAString& aTagPrefix,
michael@0 227 const nsAString& aTagNamespaceURI,
michael@0 228 nsIAtom* aTagName,
michael@0 229 nsAString& aStr,
michael@0 230 uint32_t aSkipAttr,
michael@0 231 bool aAddNSAttr)
michael@0 232 {
michael@0 233 nsresult rv;
michael@0 234 uint32_t index, count;
michael@0 235 nsAutoString prefixStr, uriStr, valueStr;
michael@0 236 nsAutoString xmlnsStr;
michael@0 237 xmlnsStr.AssignLiteral(kXMLNS);
michael@0 238
michael@0 239 int32_t contentNamespaceID = aContent->GetNameSpaceID();
michael@0 240
michael@0 241 // this method is not called by nsHTMLContentSerializer
michael@0 242 // so we don't have to check HTML element, just XHTML
michael@0 243
michael@0 244 if (mIsCopying && kNameSpaceID_XHTML == contentNamespaceID) {
michael@0 245
michael@0 246 // Need to keep track of OL and LI elements in order to get ordinal number
michael@0 247 // for the LI.
michael@0 248 if (aTagName == nsGkAtoms::ol) {
michael@0 249 // We are copying and current node is an OL;
michael@0 250 // Store its start attribute value in olState->startVal.
michael@0 251 nsAutoString start;
michael@0 252 int32_t startAttrVal = 0;
michael@0 253 aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
michael@0 254 if (!start.IsEmpty()) {
michael@0 255 nsresult rv = NS_OK;
michael@0 256 startAttrVal = start.ToInteger(&rv);
michael@0 257 //If OL has "start" attribute, first LI element has to start with that value
michael@0 258 //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
michael@0 259 //In failure of ToInteger(), default StartAttrValue to 0.
michael@0 260 if (NS_SUCCEEDED(rv))
michael@0 261 --startAttrVal;
michael@0 262 else
michael@0 263 startAttrVal = 0;
michael@0 264 }
michael@0 265 olState state (startAttrVal, true);
michael@0 266 mOLStateStack.AppendElement(state);
michael@0 267 }
michael@0 268 else if (aTagName == nsGkAtoms::li) {
michael@0 269 mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
michael@0 270 if (mIsFirstChildOfOL) {
michael@0 271 // If OL is parent of this LI, serialize attributes in different manner.
michael@0 272 SerializeLIValueAttribute(aContent, aStr);
michael@0 273 }
michael@0 274 }
michael@0 275 }
michael@0 276
michael@0 277 // If we had to add a new namespace declaration, serialize
michael@0 278 // and push it on the namespace stack
michael@0 279 if (aAddNSAttr) {
michael@0 280 if (aTagPrefix.IsEmpty()) {
michael@0 281 // Serialize default namespace decl
michael@0 282 SerializeAttr(EmptyString(), xmlnsStr, aTagNamespaceURI, aStr, true);
michael@0 283 } else {
michael@0 284 // Serialize namespace decl
michael@0 285 SerializeAttr(xmlnsStr, aTagPrefix, aTagNamespaceURI, aStr, true);
michael@0 286 }
michael@0 287 PushNameSpaceDecl(aTagPrefix, aTagNamespaceURI, aOriginalElement);
michael@0 288 }
michael@0 289
michael@0 290 NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
michael@0 291
michael@0 292 count = aContent->GetAttrCount();
michael@0 293
michael@0 294 // Now serialize each of the attributes
michael@0 295 // XXX Unfortunately we need a namespace manager to get
michael@0 296 // attribute URIs.
michael@0 297 for (index = 0; index < count; index++) {
michael@0 298
michael@0 299 if (aSkipAttr == index) {
michael@0 300 continue;
michael@0 301 }
michael@0 302
michael@0 303 const nsAttrName* name = aContent->GetAttrNameAt(index);
michael@0 304 int32_t namespaceID = name->NamespaceID();
michael@0 305 nsIAtom* attrName = name->LocalName();
michael@0 306 nsIAtom* attrPrefix = name->GetPrefix();
michael@0 307
michael@0 308 // Filter out any attribute starting with [-|_]moz
michael@0 309 nsDependentAtomString attrNameStr(attrName);
michael@0 310 if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
michael@0 311 StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
michael@0 312 continue;
michael@0 313 }
michael@0 314
michael@0 315 if (attrPrefix) {
michael@0 316 attrPrefix->ToString(prefixStr);
michael@0 317 }
michael@0 318 else {
michael@0 319 prefixStr.Truncate();
michael@0 320 }
michael@0 321
michael@0 322 bool addNSAttr = false;
michael@0 323 if (kNameSpaceID_XMLNS != namespaceID) {
michael@0 324 nsContentUtils::NameSpaceManager()->GetNameSpaceURI(namespaceID, uriStr);
michael@0 325 addNSAttr = ConfirmPrefix(prefixStr, uriStr, aOriginalElement, true);
michael@0 326 }
michael@0 327
michael@0 328 aContent->GetAttr(namespaceID, attrName, valueStr);
michael@0 329
michael@0 330 nsDependentAtomString nameStr(attrName);
michael@0 331 bool isJS = false;
michael@0 332
michael@0 333 if (kNameSpaceID_XHTML == contentNamespaceID) {
michael@0 334 //
michael@0 335 // Filter out special case of <br type="_moz"> or <br _moz*>,
michael@0 336 // used by the editor. Bug 16988. Yuck.
michael@0 337 //
michael@0 338 if (namespaceID == kNameSpaceID_None && aTagName == nsGkAtoms::br && attrName == nsGkAtoms::type
michael@0 339 && StringBeginsWith(valueStr, _mozStr)) {
michael@0 340 continue;
michael@0 341 }
michael@0 342
michael@0 343 if (mIsCopying && mIsFirstChildOfOL && (aTagName == nsGkAtoms::li)
michael@0 344 && (attrName == nsGkAtoms::value)) {
michael@0 345 // This is handled separately in SerializeLIValueAttribute()
michael@0 346 continue;
michael@0 347 }
michael@0 348
michael@0 349 isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
michael@0 350
michael@0 351 if (namespaceID == kNameSpaceID_None &&
michael@0 352 ((attrName == nsGkAtoms::href) ||
michael@0 353 (attrName == nsGkAtoms::src))) {
michael@0 354 // Make all links absolute when converting only the selection:
michael@0 355 if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
michael@0 356 // Would be nice to handle OBJECT and APPLET tags,
michael@0 357 // but that gets more complicated since we have to
michael@0 358 // search the tag list for CODEBASE as well.
michael@0 359 // For now, just leave them relative.
michael@0 360 nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
michael@0 361 if (uri) {
michael@0 362 nsAutoString absURI;
michael@0 363 rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
michael@0 364 if (NS_SUCCEEDED(rv)) {
michael@0 365 valueStr = absURI;
michael@0 366 }
michael@0 367 }
michael@0 368 }
michael@0 369 // Need to escape URI.
michael@0 370 nsAutoString tempURI(valueStr);
michael@0 371 if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
michael@0 372 valueStr = tempURI;
michael@0 373 }
michael@0 374
michael@0 375 if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
michael@0 376 attrName == nsGkAtoms::content) {
michael@0 377 // If we're serializing a <meta http-equiv="content-type">,
michael@0 378 // use the proper value, rather than what's in the document.
michael@0 379 nsAutoString header;
michael@0 380 aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
michael@0 381 if (header.LowerCaseEqualsLiteral("content-type")) {
michael@0 382 valueStr = NS_LITERAL_STRING("text/html; charset=") +
michael@0 383 NS_ConvertASCIItoUTF16(mCharset);
michael@0 384 }
michael@0 385 }
michael@0 386
michael@0 387 // Expand shorthand attribute.
michael@0 388 if (namespaceID == kNameSpaceID_None && IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
michael@0 389 valueStr = nameStr;
michael@0 390 }
michael@0 391 }
michael@0 392 else {
michael@0 393 isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
michael@0 394 }
michael@0 395
michael@0 396 SerializeAttr(prefixStr, nameStr, valueStr, aStr, !isJS);
michael@0 397
michael@0 398 if (addNSAttr) {
michael@0 399 NS_ASSERTION(!prefixStr.IsEmpty(),
michael@0 400 "Namespaced attributes must have a prefix");
michael@0 401 SerializeAttr(xmlnsStr, prefixStr, uriStr, aStr, true);
michael@0 402 PushNameSpaceDecl(prefixStr, uriStr, aOriginalElement);
michael@0 403 }
michael@0 404 }
michael@0 405 }
michael@0 406
michael@0 407
michael@0 408 void
michael@0 409 nsXHTMLContentSerializer::AppendEndOfElementStart(nsIContent *aOriginalElement,
michael@0 410 nsIAtom * aName,
michael@0 411 int32_t aNamespaceID,
michael@0 412 nsAString& aStr)
michael@0 413 {
michael@0 414 // this method is not called by nsHTMLContentSerializer
michael@0 415 // so we don't have to check HTML element, just XHTML
michael@0 416 NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
michael@0 417
michael@0 418 if (kNameSpaceID_XHTML != aNamespaceID) {
michael@0 419 nsXMLContentSerializer::AppendEndOfElementStart(aOriginalElement, aName,
michael@0 420 aNamespaceID, aStr);
michael@0 421 return;
michael@0 422 }
michael@0 423
michael@0 424 nsIContent* content = aOriginalElement;
michael@0 425
michael@0 426 // for non empty elements, even if they are not a container, we always
michael@0 427 // serialize their content, because the XHTML element could contain non XHTML
michael@0 428 // nodes useful in some context, like in an XSLT stylesheet
michael@0 429 if (HasNoChildren(content)) {
michael@0 430
michael@0 431 nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0 432
michael@0 433 if (parserService) {
michael@0 434 bool isContainer;
michael@0 435 parserService->
michael@0 436 IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(aName),
michael@0 437 isContainer);
michael@0 438 if (!isContainer) {
michael@0 439 // for backward compatibility with HTML 4 user agents
michael@0 440 // only non-container HTML elements can be closed immediatly,
michael@0 441 // and a space is added before />
michael@0 442 AppendToString(NS_LITERAL_STRING(" />"), aStr);
michael@0 443 return;
michael@0 444 }
michael@0 445 }
michael@0 446 }
michael@0 447 AppendToString(kGreaterThan, aStr);
michael@0 448 }
michael@0 449
michael@0 450 void
michael@0 451 nsXHTMLContentSerializer::AfterElementStart(nsIContent * aContent,
michael@0 452 nsIContent *aOriginalElement,
michael@0 453 nsAString& aStr)
michael@0 454 {
michael@0 455 nsIAtom *name = aContent->Tag();
michael@0 456 if (aContent->GetNameSpaceID() == kNameSpaceID_XHTML &&
michael@0 457 mRewriteEncodingDeclaration &&
michael@0 458 name == nsGkAtoms::head) {
michael@0 459
michael@0 460 // Check if there already are any content-type meta children.
michael@0 461 // If there are, they will be modified to use the correct charset.
michael@0 462 // If there aren't, we'll insert one here.
michael@0 463 bool hasMeta = false;
michael@0 464 for (nsIContent* child = aContent->GetFirstChild();
michael@0 465 child;
michael@0 466 child = child->GetNextSibling()) {
michael@0 467 if (child->IsHTML(nsGkAtoms::meta) &&
michael@0 468 child->HasAttr(kNameSpaceID_None, nsGkAtoms::content)) {
michael@0 469 nsAutoString header;
michael@0 470 child->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
michael@0 471
michael@0 472 if (header.LowerCaseEqualsLiteral("content-type")) {
michael@0 473 hasMeta = true;
michael@0 474 break;
michael@0 475 }
michael@0 476 }
michael@0 477 }
michael@0 478
michael@0 479 if (!hasMeta) {
michael@0 480 AppendNewLineToString(aStr);
michael@0 481 if (mDoFormat) {
michael@0 482 AppendIndentation(aStr);
michael@0 483 }
michael@0 484 AppendToString(NS_LITERAL_STRING("<meta http-equiv=\"content-type\""),
michael@0 485 aStr);
michael@0 486 AppendToString(NS_LITERAL_STRING(" content=\"text/html; charset="), aStr);
michael@0 487 AppendToString(NS_ConvertASCIItoUTF16(mCharset), aStr);
michael@0 488 if (mIsHTMLSerializer)
michael@0 489 AppendToString(NS_LITERAL_STRING("\">"), aStr);
michael@0 490 else
michael@0 491 AppendToString(NS_LITERAL_STRING("\" />"), aStr);
michael@0 492 }
michael@0 493 }
michael@0 494 }
michael@0 495
michael@0 496 void
michael@0 497 nsXHTMLContentSerializer::AfterElementEnd(nsIContent * aContent,
michael@0 498 nsAString& aStr)
michael@0 499 {
michael@0 500 NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
michael@0 501
michael@0 502 int32_t namespaceID = aContent->GetNameSpaceID();
michael@0 503 nsIAtom *name = aContent->Tag();
michael@0 504
michael@0 505 // this method is not called by nsHTMLContentSerializer
michael@0 506 // so we don't have to check HTML element, just XHTML
michael@0 507 if (kNameSpaceID_XHTML == namespaceID && name == nsGkAtoms::body) {
michael@0 508 --mInBody;
michael@0 509 }
michael@0 510 }
michael@0 511
michael@0 512
michael@0 513 NS_IMETHODIMP
michael@0 514 nsXHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
michael@0 515 nsAString& aStr)
michael@0 516 {
michael@0 517 if (!mBodyOnly)
michael@0 518 return nsXMLContentSerializer::AppendDocumentStart(aDocument, aStr);
michael@0 519
michael@0 520 return NS_OK;
michael@0 521 }
michael@0 522
michael@0 523 bool
michael@0 524 nsXHTMLContentSerializer::CheckElementStart(nsIContent * aContent,
michael@0 525 bool & aForceFormat,
michael@0 526 nsAString& aStr)
michael@0 527 {
michael@0 528 // The _moz_dirty attribute is emitted by the editor to
michael@0 529 // indicate that this element should be pretty printed
michael@0 530 // even if we're not in pretty printing mode
michael@0 531 aForceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
michael@0 532 aContent->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
michael@0 533
michael@0 534 nsIAtom *name = aContent->Tag();
michael@0 535 int32_t namespaceID = aContent->GetNameSpaceID();
michael@0 536
michael@0 537 if (namespaceID == kNameSpaceID_XHTML) {
michael@0 538 if (name == nsGkAtoms::br && mPreLevel > 0 &&
michael@0 539 (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre)) {
michael@0 540 AppendNewLineToString(aStr);
michael@0 541 return false;
michael@0 542 }
michael@0 543
michael@0 544 if (name == nsGkAtoms::body) {
michael@0 545 ++mInBody;
michael@0 546 }
michael@0 547 }
michael@0 548 return true;
michael@0 549 }
michael@0 550
michael@0 551 bool
michael@0 552 nsXHTMLContentSerializer::CheckElementEnd(nsIContent * aContent,
michael@0 553 bool & aForceFormat,
michael@0 554 nsAString& aStr)
michael@0 555 {
michael@0 556 NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
michael@0 557
michael@0 558 aForceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
michael@0 559 aContent->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
michael@0 560
michael@0 561 nsIAtom *name = aContent->Tag();
michael@0 562 int32_t namespaceID = aContent->GetNameSpaceID();
michael@0 563
michael@0 564 // this method is not called by nsHTMLContentSerializer
michael@0 565 // so we don't have to check HTML element, just XHTML
michael@0 566 if (namespaceID == kNameSpaceID_XHTML) {
michael@0 567 if (mIsCopying && name == nsGkAtoms::ol) {
michael@0 568 NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
michael@0 569 /* Though at this point we must always have an state to be deleted as all
michael@0 570 the OL opening tags are supposed to push an olState object to the stack*/
michael@0 571 if (!mOLStateStack.IsEmpty()) {
michael@0 572 mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
michael@0 573 }
michael@0 574 }
michael@0 575
michael@0 576 if (HasNoChildren(aContent)) {
michael@0 577 nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0 578
michael@0 579 if (parserService) {
michael@0 580 bool isContainer;
michael@0 581
michael@0 582 parserService->
michael@0 583 IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
michael@0 584 isContainer);
michael@0 585 if (!isContainer) {
michael@0 586 // non-container HTML elements are already closed,
michael@0 587 // see AppendEndOfElementStart
michael@0 588 return false;
michael@0 589 }
michael@0 590 }
michael@0 591 }
michael@0 592 // for backward compatibility with old HTML user agents,
michael@0 593 // empty elements should have an ending tag, so we mustn't call
michael@0 594 // nsXMLContentSerializer::CheckElementEnd
michael@0 595 return true;
michael@0 596 }
michael@0 597
michael@0 598 bool dummyFormat;
michael@0 599 return nsXMLContentSerializer::CheckElementEnd(aContent, dummyFormat, aStr);
michael@0 600 }
michael@0 601
michael@0 602 void
michael@0 603 nsXHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
michael@0 604 nsAString& aOutputStr)
michael@0 605 {
michael@0 606 if (mBodyOnly && !mInBody) {
michael@0 607 return;
michael@0 608 }
michael@0 609
michael@0 610 if (mDisableEntityEncoding) {
michael@0 611 aOutputStr.Append(aStr);
michael@0 612 return;
michael@0 613 }
michael@0 614
michael@0 615 nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
michael@0 616 }
michael@0 617
michael@0 618 bool
michael@0 619 nsXHTMLContentSerializer::IsShorthandAttr(const nsIAtom* aAttrName,
michael@0 620 const nsIAtom* aElementName)
michael@0 621 {
michael@0 622 // checked
michael@0 623 if ((aAttrName == nsGkAtoms::checked) &&
michael@0 624 (aElementName == nsGkAtoms::input)) {
michael@0 625 return true;
michael@0 626 }
michael@0 627
michael@0 628 // compact
michael@0 629 if ((aAttrName == nsGkAtoms::compact) &&
michael@0 630 (aElementName == nsGkAtoms::dir ||
michael@0 631 aElementName == nsGkAtoms::dl ||
michael@0 632 aElementName == nsGkAtoms::menu ||
michael@0 633 aElementName == nsGkAtoms::ol ||
michael@0 634 aElementName == nsGkAtoms::ul)) {
michael@0 635 return true;
michael@0 636 }
michael@0 637
michael@0 638 // declare
michael@0 639 if ((aAttrName == nsGkAtoms::declare) &&
michael@0 640 (aElementName == nsGkAtoms::object)) {
michael@0 641 return true;
michael@0 642 }
michael@0 643
michael@0 644 // defer
michael@0 645 if ((aAttrName == nsGkAtoms::defer) &&
michael@0 646 (aElementName == nsGkAtoms::script)) {
michael@0 647 return true;
michael@0 648 }
michael@0 649
michael@0 650 // disabled
michael@0 651 if ((aAttrName == nsGkAtoms::disabled) &&
michael@0 652 (aElementName == nsGkAtoms::button ||
michael@0 653 aElementName == nsGkAtoms::input ||
michael@0 654 aElementName == nsGkAtoms::optgroup ||
michael@0 655 aElementName == nsGkAtoms::option ||
michael@0 656 aElementName == nsGkAtoms::select ||
michael@0 657 aElementName == nsGkAtoms::textarea)) {
michael@0 658 return true;
michael@0 659 }
michael@0 660
michael@0 661 // ismap
michael@0 662 if ((aAttrName == nsGkAtoms::ismap) &&
michael@0 663 (aElementName == nsGkAtoms::img ||
michael@0 664 aElementName == nsGkAtoms::input)) {
michael@0 665 return true;
michael@0 666 }
michael@0 667
michael@0 668 // multiple
michael@0 669 if ((aAttrName == nsGkAtoms::multiple) &&
michael@0 670 (aElementName == nsGkAtoms::select)) {
michael@0 671 return true;
michael@0 672 }
michael@0 673
michael@0 674 // noresize
michael@0 675 if ((aAttrName == nsGkAtoms::noresize) &&
michael@0 676 (aElementName == nsGkAtoms::frame)) {
michael@0 677 return true;
michael@0 678 }
michael@0 679
michael@0 680 // noshade
michael@0 681 if ((aAttrName == nsGkAtoms::noshade) &&
michael@0 682 (aElementName == nsGkAtoms::hr)) {
michael@0 683 return true;
michael@0 684 }
michael@0 685
michael@0 686 // nowrap
michael@0 687 if ((aAttrName == nsGkAtoms::nowrap) &&
michael@0 688 (aElementName == nsGkAtoms::td ||
michael@0 689 aElementName == nsGkAtoms::th)) {
michael@0 690 return true;
michael@0 691 }
michael@0 692
michael@0 693 // readonly
michael@0 694 if ((aAttrName == nsGkAtoms::readonly) &&
michael@0 695 (aElementName == nsGkAtoms::input ||
michael@0 696 aElementName == nsGkAtoms::textarea)) {
michael@0 697 return true;
michael@0 698 }
michael@0 699
michael@0 700 // selected
michael@0 701 if ((aAttrName == nsGkAtoms::selected) &&
michael@0 702 (aElementName == nsGkAtoms::option)) {
michael@0 703 return true;
michael@0 704 }
michael@0 705
michael@0 706 // autoplay and controls
michael@0 707 if ((aElementName == nsGkAtoms::video || aElementName == nsGkAtoms::audio) &&
michael@0 708 (aAttrName == nsGkAtoms::autoplay || aAttrName == nsGkAtoms::muted ||
michael@0 709 aAttrName == nsGkAtoms::controls)) {
michael@0 710 return true;
michael@0 711 }
michael@0 712
michael@0 713 return false;
michael@0 714 }
michael@0 715
michael@0 716 bool
michael@0 717 nsXHTMLContentSerializer::LineBreakBeforeOpen(int32_t aNamespaceID, nsIAtom* aName)
michael@0 718 {
michael@0 719
michael@0 720 if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0 721 return mAddSpace;
michael@0 722 }
michael@0 723
michael@0 724 if (aName == nsGkAtoms::title ||
michael@0 725 aName == nsGkAtoms::meta ||
michael@0 726 aName == nsGkAtoms::link ||
michael@0 727 aName == nsGkAtoms::style ||
michael@0 728 aName == nsGkAtoms::select ||
michael@0 729 aName == nsGkAtoms::option ||
michael@0 730 aName == nsGkAtoms::script ||
michael@0 731 aName == nsGkAtoms::html) {
michael@0 732 return true;
michael@0 733 }
michael@0 734 else {
michael@0 735 nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0 736
michael@0 737 if (parserService) {
michael@0 738 bool res;
michael@0 739 parserService->
michael@0 740 IsBlock(parserService->HTMLCaseSensitiveAtomTagToId(aName), res);
michael@0 741 return res;
michael@0 742 }
michael@0 743 }
michael@0 744
michael@0 745 return mAddSpace;
michael@0 746 }
michael@0 747
michael@0 748 bool
michael@0 749 nsXHTMLContentSerializer::LineBreakAfterOpen(int32_t aNamespaceID, nsIAtom* aName)
michael@0 750 {
michael@0 751
michael@0 752 if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0 753 return false;
michael@0 754 }
michael@0 755
michael@0 756 if ((aName == nsGkAtoms::html) ||
michael@0 757 (aName == nsGkAtoms::head) ||
michael@0 758 (aName == nsGkAtoms::body) ||
michael@0 759 (aName == nsGkAtoms::ul) ||
michael@0 760 (aName == nsGkAtoms::ol) ||
michael@0 761 (aName == nsGkAtoms::dl) ||
michael@0 762 (aName == nsGkAtoms::table) ||
michael@0 763 (aName == nsGkAtoms::tbody) ||
michael@0 764 (aName == nsGkAtoms::tr) ||
michael@0 765 (aName == nsGkAtoms::br) ||
michael@0 766 (aName == nsGkAtoms::meta) ||
michael@0 767 (aName == nsGkAtoms::link) ||
michael@0 768 (aName == nsGkAtoms::script) ||
michael@0 769 (aName == nsGkAtoms::select) ||
michael@0 770 (aName == nsGkAtoms::map) ||
michael@0 771 (aName == nsGkAtoms::area) ||
michael@0 772 (aName == nsGkAtoms::style)) {
michael@0 773 return true;
michael@0 774 }
michael@0 775
michael@0 776 return false;
michael@0 777 }
michael@0 778
michael@0 779 bool
michael@0 780 nsXHTMLContentSerializer::LineBreakBeforeClose(int32_t aNamespaceID, nsIAtom* aName)
michael@0 781 {
michael@0 782
michael@0 783 if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0 784 return false;
michael@0 785 }
michael@0 786
michael@0 787 if ((aName == nsGkAtoms::html) ||
michael@0 788 (aName == nsGkAtoms::head) ||
michael@0 789 (aName == nsGkAtoms::body) ||
michael@0 790 (aName == nsGkAtoms::ul) ||
michael@0 791 (aName == nsGkAtoms::ol) ||
michael@0 792 (aName == nsGkAtoms::dl) ||
michael@0 793 (aName == nsGkAtoms::select) ||
michael@0 794 (aName == nsGkAtoms::table) ||
michael@0 795 (aName == nsGkAtoms::tbody)) {
michael@0 796 return true;
michael@0 797 }
michael@0 798 return false;
michael@0 799 }
michael@0 800
michael@0 801 bool
michael@0 802 nsXHTMLContentSerializer::LineBreakAfterClose(int32_t aNamespaceID, nsIAtom* aName)
michael@0 803 {
michael@0 804
michael@0 805 if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0 806 return false;
michael@0 807 }
michael@0 808
michael@0 809 if ((aName == nsGkAtoms::html) ||
michael@0 810 (aName == nsGkAtoms::head) ||
michael@0 811 (aName == nsGkAtoms::body) ||
michael@0 812 (aName == nsGkAtoms::tr) ||
michael@0 813 (aName == nsGkAtoms::th) ||
michael@0 814 (aName == nsGkAtoms::td) ||
michael@0 815 (aName == nsGkAtoms::pre) ||
michael@0 816 (aName == nsGkAtoms::title) ||
michael@0 817 (aName == nsGkAtoms::li) ||
michael@0 818 (aName == nsGkAtoms::dt) ||
michael@0 819 (aName == nsGkAtoms::dd) ||
michael@0 820 (aName == nsGkAtoms::blockquote) ||
michael@0 821 (aName == nsGkAtoms::select) ||
michael@0 822 (aName == nsGkAtoms::option) ||
michael@0 823 (aName == nsGkAtoms::p) ||
michael@0 824 (aName == nsGkAtoms::map) ||
michael@0 825 (aName == nsGkAtoms::div)) {
michael@0 826 return true;
michael@0 827 }
michael@0 828 else {
michael@0 829 nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0 830
michael@0 831 if (parserService) {
michael@0 832 bool res;
michael@0 833 parserService->
michael@0 834 IsBlock(parserService->HTMLCaseSensitiveAtomTagToId(aName), res);
michael@0 835 return res;
michael@0 836 }
michael@0 837 }
michael@0 838
michael@0 839 return false;
michael@0 840 }
michael@0 841
michael@0 842
michael@0 843 void
michael@0 844 nsXHTMLContentSerializer::MaybeEnterInPreContent(nsIContent* aNode)
michael@0 845 {
michael@0 846
michael@0 847 if (aNode->GetNameSpaceID() != kNameSpaceID_XHTML) {
michael@0 848 return;
michael@0 849 }
michael@0 850
michael@0 851 nsIAtom *name = aNode->Tag();
michael@0 852
michael@0 853 if (name == nsGkAtoms::pre ||
michael@0 854 name == nsGkAtoms::script ||
michael@0 855 name == nsGkAtoms::style ||
michael@0 856 name == nsGkAtoms::noscript ||
michael@0 857 name == nsGkAtoms::noframes
michael@0 858 ) {
michael@0 859 mPreLevel++;
michael@0 860 }
michael@0 861 }
michael@0 862
michael@0 863 void
michael@0 864 nsXHTMLContentSerializer::MaybeLeaveFromPreContent(nsIContent* aNode)
michael@0 865 {
michael@0 866 if (aNode->GetNameSpaceID() != kNameSpaceID_XHTML) {
michael@0 867 return;
michael@0 868 }
michael@0 869
michael@0 870 nsIAtom *name = aNode->Tag();
michael@0 871 if (name == nsGkAtoms::pre ||
michael@0 872 name == nsGkAtoms::script ||
michael@0 873 name == nsGkAtoms::style ||
michael@0 874 name == nsGkAtoms::noscript ||
michael@0 875 name == nsGkAtoms::noframes
michael@0 876 ) {
michael@0 877 --mPreLevel;
michael@0 878 }
michael@0 879 }
michael@0 880
michael@0 881 void
michael@0 882 nsXHTMLContentSerializer::SerializeLIValueAttribute(nsIContent* aElement,
michael@0 883 nsAString& aStr)
michael@0 884 {
michael@0 885 // We are copying and we are at the "first" LI node of OL in selected range.
michael@0 886 // It may not be the first LI child of OL but it's first in the selected range.
michael@0 887 // Note that we get into this condition only once per a OL.
michael@0 888 bool found = false;
michael@0 889 nsCOMPtr<nsIDOMNode> currNode = do_QueryInterface(aElement);
michael@0 890 nsAutoString valueStr;
michael@0 891
michael@0 892 olState state (0, false);
michael@0 893
michael@0 894 if (!mOLStateStack.IsEmpty()) {
michael@0 895 state = mOLStateStack[mOLStateStack.Length()-1];
michael@0 896 // isFirstListItem should be true only before the serialization of the
michael@0 897 // first item in the list.
michael@0 898 state.isFirstListItem = false;
michael@0 899 mOLStateStack[mOLStateStack.Length()-1] = state;
michael@0 900 }
michael@0 901
michael@0 902 int32_t startVal = state.startVal;
michael@0 903 int32_t offset = 0;
michael@0 904
michael@0 905 // Traverse previous siblings until we find one with "value" attribute.
michael@0 906 // offset keeps track of how many previous siblings we had tocurrNode traverse.
michael@0 907 while (currNode && !found) {
michael@0 908 nsCOMPtr<nsIDOMElement> currElement = do_QueryInterface(currNode);
michael@0 909 // currElement may be null if it were a text node.
michael@0 910 if (currElement) {
michael@0 911 nsAutoString tagName;
michael@0 912 currElement->GetTagName(tagName);
michael@0 913 if (tagName.LowerCaseEqualsLiteral("li")) {
michael@0 914 currElement->GetAttribute(NS_LITERAL_STRING("value"), valueStr);
michael@0 915 if (valueStr.IsEmpty())
michael@0 916 offset++;
michael@0 917 else {
michael@0 918 found = true;
michael@0 919 nsresult rv = NS_OK;
michael@0 920 startVal = valueStr.ToInteger(&rv);
michael@0 921 }
michael@0 922 }
michael@0 923 }
michael@0 924 nsCOMPtr<nsIDOMNode> tmp;
michael@0 925 currNode->GetPreviousSibling(getter_AddRefs(tmp));
michael@0 926 currNode.swap(tmp);
michael@0 927 }
michael@0 928 // If LI was not having "value", Set the "value" attribute for it.
michael@0 929 // Note that We are at the first LI in the selected range of OL.
michael@0 930 if (offset == 0 && found) {
michael@0 931 // offset = 0 => LI itself has the value attribute and we did not need to traverse back.
michael@0 932 // Just serialize value attribute like other tags.
michael@0 933 SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, false);
michael@0 934 }
michael@0 935 else if (offset == 1 && !found) {
michael@0 936 /*(offset = 1 && !found) means either LI is the first child node of OL
michael@0 937 and LI is not having "value" attribute.
michael@0 938 In that case we would not like to set "value" attribute to reduce the changes.
michael@0 939 */
michael@0 940 //do nothing...
michael@0 941 }
michael@0 942 else if (offset > 0) {
michael@0 943 // Set value attribute.
michael@0 944 nsAutoString valueStr;
michael@0 945
michael@0 946 //As serializer needs to use this valueAttr we are creating here,
michael@0 947 valueStr.AppendInt(startVal + offset);
michael@0 948 SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, false);
michael@0 949 }
michael@0 950 }
michael@0 951
michael@0 952 bool
michael@0 953 nsXHTMLContentSerializer::IsFirstChildOfOL(nsIContent* aElement)
michael@0 954 {
michael@0 955 nsCOMPtr<nsIDOMNode> node = do_QueryInterface(aElement);
michael@0 956 nsAutoString parentName;
michael@0 957
michael@0 958 nsCOMPtr<nsIDOMNode> parentNode;
michael@0 959 node->GetParentNode(getter_AddRefs(parentNode));
michael@0 960 if (parentNode)
michael@0 961 parentNode->GetNodeName(parentName);
michael@0 962 else
michael@0 963 return false;
michael@0 964
michael@0 965 if (parentName.LowerCaseEqualsLiteral("ol")) {
michael@0 966
michael@0 967 if (!mOLStateStack.IsEmpty()) {
michael@0 968 olState state = mOLStateStack[mOLStateStack.Length()-1];
michael@0 969 if (state.isFirstListItem)
michael@0 970 return true;
michael@0 971 }
michael@0 972
michael@0 973 return false;
michael@0 974 }
michael@0 975 else
michael@0 976 return false;
michael@0 977 }
michael@0 978
michael@0 979 bool
michael@0 980 nsXHTMLContentSerializer::HasNoChildren(nsIContent * aContent) {
michael@0 981
michael@0 982 for (nsIContent* child = aContent->GetFirstChild();
michael@0 983 child;
michael@0 984 child = child->GetNextSibling()) {
michael@0 985
michael@0 986 if (!child->IsNodeOfType(nsINode::eTEXT))
michael@0 987 return false;
michael@0 988
michael@0 989 if (child->TextLength())
michael@0 990 return false;
michael@0 991 }
michael@0 992
michael@0 993 return true;
michael@0 994 }

mercurial