|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=2 sw=2 et tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 /* |
|
8 * nsIContentSerializer implementation that can be used with an |
|
9 * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML |
|
10 * string that could be parsed into more or less the original DOM. |
|
11 */ |
|
12 |
|
13 #include "nsHTMLContentSerializer.h" |
|
14 |
|
15 #include "nsIDOMElement.h" |
|
16 #include "nsIContent.h" |
|
17 #include "nsIDocument.h" |
|
18 #include "nsNameSpaceManager.h" |
|
19 #include "nsString.h" |
|
20 #include "nsUnicharUtils.h" |
|
21 #include "nsXPIDLString.h" |
|
22 #include "nsIServiceManager.h" |
|
23 #include "nsIDocumentEncoder.h" |
|
24 #include "nsGkAtoms.h" |
|
25 #include "nsIURI.h" |
|
26 #include "nsNetUtil.h" |
|
27 #include "nsEscape.h" |
|
28 #include "nsITextToSubURI.h" |
|
29 #include "nsCRT.h" |
|
30 #include "nsIParserService.h" |
|
31 #include "nsContentUtils.h" |
|
32 #include "nsLWBrkCIID.h" |
|
33 #include "nsIScriptElement.h" |
|
34 #include "nsAttrName.h" |
|
35 #include "nsIDocShell.h" |
|
36 #include "nsIEditor.h" |
|
37 #include "nsIHTMLEditor.h" |
|
38 #include "mozilla/dom/Element.h" |
|
39 #include "nsParserConstants.h" |
|
40 |
|
41 using namespace mozilla::dom; |
|
42 |
|
43 nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer) |
|
44 { |
|
45 nsHTMLContentSerializer* it = new nsHTMLContentSerializer(); |
|
46 if (!it) { |
|
47 return NS_ERROR_OUT_OF_MEMORY; |
|
48 } |
|
49 |
|
50 return CallQueryInterface(it, aSerializer); |
|
51 } |
|
52 |
|
53 nsHTMLContentSerializer::nsHTMLContentSerializer() |
|
54 { |
|
55 mIsHTMLSerializer = true; |
|
56 } |
|
57 |
|
58 nsHTMLContentSerializer::~nsHTMLContentSerializer() |
|
59 { |
|
60 } |
|
61 |
|
62 |
|
63 NS_IMETHODIMP |
|
64 nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument, |
|
65 nsAString& aStr) |
|
66 { |
|
67 return NS_OK; |
|
68 } |
|
69 |
|
70 void |
|
71 nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent, |
|
72 nsIContent *aOriginalElement, |
|
73 nsAString& aTagPrefix, |
|
74 const nsAString& aTagNamespaceURI, |
|
75 nsIAtom* aTagName, |
|
76 int32_t aNamespace, |
|
77 nsAString& aStr) |
|
78 { |
|
79 int32_t count = aContent->GetAttrCount(); |
|
80 if (!count) |
|
81 return; |
|
82 |
|
83 nsresult rv; |
|
84 nsAutoString valueStr; |
|
85 NS_NAMED_LITERAL_STRING(_mozStr, "_moz"); |
|
86 |
|
87 for (int32_t index = count; index > 0;) { |
|
88 --index; |
|
89 const nsAttrName* name = aContent->GetAttrNameAt(index); |
|
90 int32_t namespaceID = name->NamespaceID(); |
|
91 nsIAtom* attrName = name->LocalName(); |
|
92 |
|
93 // Filter out any attribute starting with [-|_]moz |
|
94 nsDependentAtomString attrNameStr(attrName); |
|
95 if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) || |
|
96 StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) { |
|
97 continue; |
|
98 } |
|
99 aContent->GetAttr(namespaceID, attrName, valueStr); |
|
100 |
|
101 // |
|
102 // Filter out special case of <br type="_moz"> or <br _moz*>, |
|
103 // used by the editor. Bug 16988. Yuck. |
|
104 // |
|
105 if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML && |
|
106 attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None && |
|
107 StringBeginsWith(valueStr, _mozStr)) { |
|
108 continue; |
|
109 } |
|
110 |
|
111 if (mIsCopying && mIsFirstChildOfOL && |
|
112 aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML && |
|
113 attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){ |
|
114 // This is handled separately in SerializeLIValueAttribute() |
|
115 continue; |
|
116 } |
|
117 bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr); |
|
118 |
|
119 if (((attrName == nsGkAtoms::href && |
|
120 (namespaceID == kNameSpaceID_None || |
|
121 namespaceID == kNameSpaceID_XLink)) || |
|
122 (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) { |
|
123 // Make all links absolute when converting only the selection: |
|
124 if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) { |
|
125 // Would be nice to handle OBJECT and APPLET tags, |
|
126 // but that gets more complicated since we have to |
|
127 // search the tag list for CODEBASE as well. |
|
128 // For now, just leave them relative. |
|
129 nsCOMPtr<nsIURI> uri = aContent->GetBaseURI(); |
|
130 if (uri) { |
|
131 nsAutoString absURI; |
|
132 rv = NS_MakeAbsoluteURI(absURI, valueStr, uri); |
|
133 if (NS_SUCCEEDED(rv)) { |
|
134 valueStr = absURI; |
|
135 } |
|
136 } |
|
137 } |
|
138 // Need to escape URI. |
|
139 nsAutoString tempURI(valueStr); |
|
140 if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr))) |
|
141 valueStr = tempURI; |
|
142 } |
|
143 |
|
144 if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta && |
|
145 aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content |
|
146 && namespaceID == kNameSpaceID_None) { |
|
147 // If we're serializing a <meta http-equiv="content-type">, |
|
148 // use the proper value, rather than what's in the document. |
|
149 nsAutoString header; |
|
150 aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header); |
|
151 if (header.LowerCaseEqualsLiteral("content-type")) { |
|
152 valueStr = NS_LITERAL_STRING("text/html; charset=") + |
|
153 NS_ConvertASCIItoUTF16(mCharset); |
|
154 } |
|
155 } |
|
156 |
|
157 nsDependentAtomString nameStr(attrName); |
|
158 nsAutoString prefix; |
|
159 if (namespaceID == kNameSpaceID_XML) { |
|
160 prefix.Assign(NS_LITERAL_STRING("xml")); |
|
161 } else if (namespaceID == kNameSpaceID_XLink) { |
|
162 prefix.Assign(NS_LITERAL_STRING("xlink")); |
|
163 } |
|
164 |
|
165 // Expand shorthand attribute. |
|
166 if (aNamespace == kNameSpaceID_XHTML && |
|
167 namespaceID == kNameSpaceID_None && |
|
168 IsShorthandAttr(attrName, aTagName) && |
|
169 valueStr.IsEmpty()) { |
|
170 valueStr = nameStr; |
|
171 } |
|
172 SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS); |
|
173 } |
|
174 } |
|
175 |
|
176 NS_IMETHODIMP |
|
177 nsHTMLContentSerializer::AppendElementStart(Element* aElement, |
|
178 Element* aOriginalElement, |
|
179 nsAString& aStr) |
|
180 { |
|
181 NS_ENSURE_ARG(aElement); |
|
182 |
|
183 nsIContent* content = aElement; |
|
184 |
|
185 bool forceFormat = false; |
|
186 if (!CheckElementStart(content, forceFormat, aStr)) { |
|
187 return NS_OK; |
|
188 } |
|
189 |
|
190 nsIAtom *name = content->Tag(); |
|
191 int32_t ns = content->GetNameSpaceID(); |
|
192 |
|
193 bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name); |
|
194 |
|
195 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) { |
|
196 if (mColPos && lineBreakBeforeOpen) { |
|
197 AppendNewLineToString(aStr); |
|
198 } |
|
199 else { |
|
200 MaybeAddNewlineForRootNode(aStr); |
|
201 } |
|
202 if (!mColPos) { |
|
203 AppendIndentation(aStr); |
|
204 } |
|
205 else if (mAddSpace) { |
|
206 AppendToString(char16_t(' '), aStr); |
|
207 mAddSpace = false; |
|
208 } |
|
209 } |
|
210 else if (mAddSpace) { |
|
211 AppendToString(char16_t(' '), aStr); |
|
212 mAddSpace = false; |
|
213 } |
|
214 else { |
|
215 MaybeAddNewlineForRootNode(aStr); |
|
216 } |
|
217 // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't |
|
218 // called |
|
219 mAddNewlineForRootNode = false; |
|
220 |
|
221 AppendToString(kLessThan, aStr); |
|
222 |
|
223 AppendToString(nsDependentAtomString(name), aStr); |
|
224 |
|
225 MaybeEnterInPreContent(content); |
|
226 |
|
227 // for block elements, we increase the indentation |
|
228 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) |
|
229 IncrIndentation(name); |
|
230 |
|
231 // Need to keep track of OL and LI elements in order to get ordinal number |
|
232 // for the LI. |
|
233 if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){ |
|
234 // We are copying and current node is an OL; |
|
235 // Store its start attribute value in olState->startVal. |
|
236 nsAutoString start; |
|
237 int32_t startAttrVal = 0; |
|
238 |
|
239 aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start); |
|
240 if (!start.IsEmpty()){ |
|
241 nsresult rv = NS_OK; |
|
242 startAttrVal = start.ToInteger(&rv); |
|
243 //If OL has "start" attribute, first LI element has to start with that value |
|
244 //Therefore subtracting 1 as all the LI elements are incrementing it before using it; |
|
245 //In failure of ToInteger(), default StartAttrValue to 0. |
|
246 if (NS_SUCCEEDED(rv)) |
|
247 startAttrVal--; |
|
248 else |
|
249 startAttrVal = 0; |
|
250 } |
|
251 mOLStateStack.AppendElement(olState(startAttrVal, true)); |
|
252 } |
|
253 |
|
254 if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) { |
|
255 mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement); |
|
256 if (mIsFirstChildOfOL){ |
|
257 // If OL is parent of this LI, serialize attributes in different manner. |
|
258 SerializeLIValueAttribute(aElement, aStr); |
|
259 } |
|
260 } |
|
261 |
|
262 // Even LI passed above have to go through this |
|
263 // for serializing attributes other than "value". |
|
264 nsAutoString dummyPrefix; |
|
265 SerializeHTMLAttributes(content, |
|
266 aOriginalElement, |
|
267 dummyPrefix, |
|
268 EmptyString(), |
|
269 name, |
|
270 ns, |
|
271 aStr); |
|
272 |
|
273 AppendToString(kGreaterThan, aStr); |
|
274 |
|
275 if (ns == kNameSpaceID_XHTML && |
|
276 (name == nsGkAtoms::script || |
|
277 name == nsGkAtoms::style || |
|
278 name == nsGkAtoms::noscript || |
|
279 name == nsGkAtoms::noframes)) { |
|
280 ++mDisableEntityEncoding; |
|
281 } |
|
282 |
|
283 if ((mDoFormat || forceFormat) && !mPreLevel && |
|
284 !mDoRaw && LineBreakAfterOpen(ns, name)) { |
|
285 AppendNewLineToString(aStr); |
|
286 } |
|
287 |
|
288 AfterElementStart(content, aOriginalElement, aStr); |
|
289 |
|
290 return NS_OK; |
|
291 } |
|
292 |
|
293 NS_IMETHODIMP |
|
294 nsHTMLContentSerializer::AppendElementEnd(Element* aElement, |
|
295 nsAString& aStr) |
|
296 { |
|
297 NS_ENSURE_ARG(aElement); |
|
298 |
|
299 nsIContent* content = aElement; |
|
300 |
|
301 nsIAtom *name = content->Tag(); |
|
302 int32_t ns = content->GetNameSpaceID(); |
|
303 |
|
304 if (ns == kNameSpaceID_XHTML && |
|
305 (name == nsGkAtoms::script || |
|
306 name == nsGkAtoms::style || |
|
307 name == nsGkAtoms::noscript || |
|
308 name == nsGkAtoms::noframes)) { |
|
309 --mDisableEntityEncoding; |
|
310 } |
|
311 |
|
312 bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) && |
|
313 content->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty); |
|
314 |
|
315 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) { |
|
316 DecrIndentation(name); |
|
317 } |
|
318 |
|
319 if (name == nsGkAtoms::script) { |
|
320 nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement); |
|
321 |
|
322 if (script && script->IsMalformed()) { |
|
323 // We're looking at a malformed script tag. This means that the end tag |
|
324 // was missing in the source. Imitate that here by not serializing the end |
|
325 // tag. |
|
326 --mPreLevel; |
|
327 return NS_OK; |
|
328 } |
|
329 } |
|
330 else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) { |
|
331 NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack"); |
|
332 /* Though at this point we must always have an state to be deleted as all |
|
333 the OL opening tags are supposed to push an olState object to the stack*/ |
|
334 if (!mOLStateStack.IsEmpty()) { |
|
335 mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1); |
|
336 } |
|
337 } |
|
338 |
|
339 if (ns == kNameSpaceID_XHTML) { |
|
340 nsIParserService* parserService = nsContentUtils::GetParserService(); |
|
341 |
|
342 if (parserService) { |
|
343 bool isContainer; |
|
344 |
|
345 parserService-> |
|
346 IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name), |
|
347 isContainer); |
|
348 if (!isContainer) { |
|
349 return NS_OK; |
|
350 } |
|
351 } |
|
352 } |
|
353 |
|
354 if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) { |
|
355 |
|
356 bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name); |
|
357 |
|
358 if (mColPos && lineBreakBeforeClose) { |
|
359 AppendNewLineToString(aStr); |
|
360 } |
|
361 if (!mColPos) { |
|
362 AppendIndentation(aStr); |
|
363 } |
|
364 else if (mAddSpace) { |
|
365 AppendToString(char16_t(' '), aStr); |
|
366 mAddSpace = false; |
|
367 } |
|
368 } |
|
369 else if (mAddSpace) { |
|
370 AppendToString(char16_t(' '), aStr); |
|
371 mAddSpace = false; |
|
372 } |
|
373 |
|
374 AppendToString(kEndTag, aStr); |
|
375 AppendToString(nsDependentAtomString(name), aStr); |
|
376 AppendToString(kGreaterThan, aStr); |
|
377 |
|
378 MaybeLeaveFromPreContent(content); |
|
379 |
|
380 if ((mDoFormat || forceFormat) && !mPreLevel |
|
381 && !mDoRaw && LineBreakAfterClose(ns, name)) { |
|
382 AppendNewLineToString(aStr); |
|
383 } |
|
384 else { |
|
385 MaybeFlagNewlineForRootNode(aElement); |
|
386 } |
|
387 |
|
388 if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) { |
|
389 --mInBody; |
|
390 } |
|
391 |
|
392 return NS_OK; |
|
393 } |
|
394 |
|
395 static const uint16_t kValNBSP = 160; |
|
396 static const char* kEntities[] = { |
|
397 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
398 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
399 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
400 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "&", nullptr, |
|
401 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
402 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
403 "<", nullptr, ">", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
404 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
405 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
406 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
407 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
408 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
409 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
410 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
411 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
412 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
413 " " |
|
414 }; |
|
415 |
|
416 static const char* kAttrEntities[] = { |
|
417 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
418 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
419 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
420 nullptr, nullptr, nullptr, nullptr, """, nullptr, nullptr, nullptr, "&", nullptr, |
|
421 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
422 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
423 "<", nullptr, ">", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
424 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
425 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
426 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
427 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
428 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
429 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
430 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
431 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
432 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, |
|
433 " " |
|
434 }; |
|
435 |
|
436 uint32_t FindNextBasicEntity(const nsAString& aStr, |
|
437 const uint32_t aLen, |
|
438 uint32_t aIndex, |
|
439 const char** aEntityTable, |
|
440 const char** aEntity) |
|
441 { |
|
442 for (; aIndex < aLen; ++aIndex) { |
|
443 // for each character in this chunk, check if it |
|
444 // needs to be replaced |
|
445 char16_t val = aStr[aIndex]; |
|
446 if (val <= kValNBSP && aEntityTable[val]) { |
|
447 *aEntity = aEntityTable[val]; |
|
448 return aIndex; |
|
449 } |
|
450 } |
|
451 return aIndex; |
|
452 } |
|
453 |
|
454 void |
|
455 nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr, |
|
456 nsAString& aOutputStr) |
|
457 { |
|
458 if (mBodyOnly && !mInBody) { |
|
459 return; |
|
460 } |
|
461 |
|
462 if (mDisableEntityEncoding) { |
|
463 aOutputStr.Append(aStr); |
|
464 return; |
|
465 } |
|
466 |
|
467 bool nonBasicEntities = |
|
468 !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities | |
|
469 nsIDocumentEncoder::OutputEncodeHTMLEntities | |
|
470 nsIDocumentEncoder::OutputEncodeW3CEntities)); |
|
471 |
|
472 if (!nonBasicEntities && |
|
473 (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) { |
|
474 const char **entityTable = mInAttribute ? kAttrEntities : kEntities; |
|
475 uint32_t start = 0; |
|
476 const uint32_t len = aStr.Length(); |
|
477 for (uint32_t i = 0; i < len; ++i) { |
|
478 const char* entity = nullptr; |
|
479 i = FindNextBasicEntity(aStr, len, i, entityTable, &entity); |
|
480 uint32_t normalTextLen = i - start; |
|
481 if (normalTextLen) { |
|
482 aOutputStr.Append(Substring(aStr, start, normalTextLen)); |
|
483 } |
|
484 if (entity) { |
|
485 aOutputStr.AppendASCII(entity); |
|
486 start = i + 1; |
|
487 } |
|
488 } |
|
489 return; |
|
490 } else if (nonBasicEntities) { |
|
491 nsIParserService* parserService = nsContentUtils::GetParserService(); |
|
492 |
|
493 if (!parserService) { |
|
494 NS_ERROR("Can't get parser service"); |
|
495 return; |
|
496 } |
|
497 |
|
498 nsReadingIterator<char16_t> done_reading; |
|
499 aStr.EndReading(done_reading); |
|
500 |
|
501 // for each chunk of |aString|... |
|
502 uint32_t advanceLength = 0; |
|
503 nsReadingIterator<char16_t> iter; |
|
504 |
|
505 const char **entityTable = mInAttribute ? kAttrEntities : kEntities; |
|
506 nsAutoCString entityReplacement; |
|
507 |
|
508 for (aStr.BeginReading(iter); |
|
509 iter != done_reading; |
|
510 iter.advance(int32_t(advanceLength))) { |
|
511 uint32_t fragmentLength = iter.size_forward(); |
|
512 uint32_t lengthReplaced = 0; // the number of UTF-16 codepoints |
|
513 // replaced by a particular entity |
|
514 const char16_t* c = iter.get(); |
|
515 const char16_t* fragmentStart = c; |
|
516 const char16_t* fragmentEnd = c + fragmentLength; |
|
517 const char* entityText = nullptr; |
|
518 const char* fullConstEntityText = nullptr; |
|
519 char* fullEntityText = nullptr; |
|
520 |
|
521 advanceLength = 0; |
|
522 // for each character in this chunk, check if it |
|
523 // needs to be replaced |
|
524 for (; c < fragmentEnd; c++, advanceLength++) { |
|
525 char16_t val = *c; |
|
526 if (val <= kValNBSP && entityTable[val]) { |
|
527 fullConstEntityText = entityTable[val]; |
|
528 break; |
|
529 } else if (val > 127 && |
|
530 ((val < 256 && |
|
531 mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) || |
|
532 mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) { |
|
533 entityReplacement.Truncate(); |
|
534 parserService->HTMLConvertUnicodeToEntity(val, entityReplacement); |
|
535 |
|
536 if (!entityReplacement.IsEmpty()) { |
|
537 entityText = entityReplacement.get(); |
|
538 break; |
|
539 } |
|
540 } |
|
541 else if (val > 127 && |
|
542 mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities && |
|
543 mEntityConverter) { |
|
544 if (NS_IS_HIGH_SURROGATE(val) && |
|
545 c + 1 < fragmentEnd && |
|
546 NS_IS_LOW_SURROGATE(*(c + 1))) { |
|
547 uint32_t valUTF32 = SURROGATE_TO_UCS4(val, *(++c)); |
|
548 if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32, |
|
549 nsIEntityConverter::entityW3C, &fullEntityText))) { |
|
550 lengthReplaced = 2; |
|
551 break; |
|
552 } |
|
553 else { |
|
554 advanceLength++; |
|
555 } |
|
556 } |
|
557 else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val, |
|
558 nsIEntityConverter::entityW3C, |
|
559 &fullEntityText))) { |
|
560 lengthReplaced = 1; |
|
561 break; |
|
562 } |
|
563 } |
|
564 } |
|
565 |
|
566 aOutputStr.Append(fragmentStart, advanceLength); |
|
567 if (entityText) { |
|
568 aOutputStr.Append(char16_t('&')); |
|
569 AppendASCIItoUTF16(entityText, aOutputStr); |
|
570 aOutputStr.Append(char16_t(';')); |
|
571 advanceLength++; |
|
572 } |
|
573 else if (fullConstEntityText) { |
|
574 aOutputStr.AppendASCII(fullConstEntityText); |
|
575 ++advanceLength; |
|
576 } |
|
577 // if it comes from nsIEntityConverter, it already has '&' and ';' |
|
578 else if (fullEntityText) { |
|
579 AppendASCIItoUTF16(fullEntityText, aOutputStr); |
|
580 nsMemory::Free(fullEntityText); |
|
581 advanceLength += lengthReplaced; |
|
582 } |
|
583 } |
|
584 } else { |
|
585 nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr); |
|
586 } |
|
587 } |