|
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "mozTXTToHTMLConv.h" |
|
7 #include "nsNetUtil.h" |
|
8 #include "nsUnicharUtils.h" |
|
9 #include "nsCRT.h" |
|
10 #include "nsIExternalProtocolHandler.h" |
|
11 #include "nsIIOService.h" |
|
12 |
|
13 #include <algorithm> |
|
14 |
|
15 #ifdef DEBUG_BenB_Perf |
|
16 #include "prtime.h" |
|
17 #include "prinrval.h" |
|
18 #endif |
|
19 |
|
20 const double growthRate = 1.2; |
|
21 |
|
22 // Bug 183111, editor now replaces multiple spaces with leading |
|
23 // 0xA0's and a single ending space, so need to treat 0xA0's as spaces. |
|
24 // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)" |
|
25 // Also recognize the Japanese ideographic space 0x3000 as a space. |
|
26 static inline bool IsSpace(const char16_t aChar) |
|
27 { |
|
28 return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000); |
|
29 } |
|
30 |
|
31 // Escape Char will take ch, escape it and append the result to |
|
32 // aStringToAppendTo |
|
33 void |
|
34 mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo, |
|
35 bool inAttribute) |
|
36 { |
|
37 switch (ch) |
|
38 { |
|
39 case '<': |
|
40 aStringToAppendTo.AppendLiteral("<"); |
|
41 break; |
|
42 case '>': |
|
43 aStringToAppendTo.AppendLiteral(">"); |
|
44 break; |
|
45 case '&': |
|
46 aStringToAppendTo.AppendLiteral("&"); |
|
47 break; |
|
48 case '"': |
|
49 if (inAttribute) |
|
50 { |
|
51 aStringToAppendTo.AppendLiteral("""); |
|
52 break; |
|
53 } |
|
54 // else fall through |
|
55 default: |
|
56 aStringToAppendTo += ch; |
|
57 } |
|
58 |
|
59 return; |
|
60 } |
|
61 |
|
62 // EscapeStr takes the passed in string and |
|
63 // escapes it IN PLACE. |
|
64 void |
|
65 mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) |
|
66 { |
|
67 // the replace substring routines |
|
68 // don't seem to work if you have a character |
|
69 // in the in string that is also in the replacement |
|
70 // string! =( |
|
71 //aInString.ReplaceSubstring("&", "&"); |
|
72 //aInString.ReplaceSubstring("<", "<"); |
|
73 //aInString.ReplaceSubstring(">", ">"); |
|
74 for (uint32_t i = 0; i < aInString.Length();) |
|
75 { |
|
76 switch (aInString[i]) |
|
77 { |
|
78 case '<': |
|
79 aInString.Cut(i, 1); |
|
80 aInString.Insert(NS_LITERAL_STRING("<"), i); |
|
81 i += 4; // skip past the integers we just added |
|
82 break; |
|
83 case '>': |
|
84 aInString.Cut(i, 1); |
|
85 aInString.Insert(NS_LITERAL_STRING(">"), i); |
|
86 i += 4; // skip past the integers we just added |
|
87 break; |
|
88 case '&': |
|
89 aInString.Cut(i, 1); |
|
90 aInString.Insert(NS_LITERAL_STRING("&"), i); |
|
91 i += 5; // skip past the integers we just added |
|
92 break; |
|
93 case '"': |
|
94 if (inAttribute) |
|
95 { |
|
96 aInString.Cut(i, 1); |
|
97 aInString.Insert(NS_LITERAL_STRING("""), i); |
|
98 i += 6; |
|
99 break; |
|
100 } |
|
101 // else fall through |
|
102 default: |
|
103 i++; |
|
104 } |
|
105 } |
|
106 } |
|
107 |
|
108 void |
|
109 mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString) |
|
110 { |
|
111 const char16_t * subString = nullptr; |
|
112 for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) |
|
113 { |
|
114 int32_t remainingChars = i - aStartPos; |
|
115 if (aInString[i] == '&') |
|
116 { |
|
117 subString = &aInString[i]; |
|
118 if (!nsCRT::strncmp(subString, MOZ_UTF16("<"), std::min(4, aLength - remainingChars))) |
|
119 { |
|
120 aOutString.Append(char16_t('<')); |
|
121 i += 4; |
|
122 } |
|
123 else if (!nsCRT::strncmp(subString, MOZ_UTF16(">"), std::min(4, aLength - remainingChars))) |
|
124 { |
|
125 aOutString.Append(char16_t('>')); |
|
126 i += 4; |
|
127 } |
|
128 else if (!nsCRT::strncmp(subString, MOZ_UTF16("&"), std::min(5, aLength - remainingChars))) |
|
129 { |
|
130 aOutString.Append(char16_t('&')); |
|
131 i += 5; |
|
132 } |
|
133 else if (!nsCRT::strncmp(subString, MOZ_UTF16("""), std::min(6, aLength - remainingChars))) |
|
134 { |
|
135 aOutString.Append(char16_t('"')); |
|
136 i += 6; |
|
137 } |
|
138 else |
|
139 { |
|
140 aOutString += aInString[i]; |
|
141 i++; |
|
142 } |
|
143 } |
|
144 else |
|
145 { |
|
146 aOutString += aInString[i]; |
|
147 i++; |
|
148 } |
|
149 } |
|
150 } |
|
151 |
|
152 void |
|
153 mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, |
|
154 const uint32_t pos, nsString& aOutString) |
|
155 { |
|
156 NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851"); |
|
157 if (int32_t(pos) >= aInLength) |
|
158 return; |
|
159 |
|
160 if (aInString[pos] == '@') |
|
161 { |
|
162 // only pre-pend a mailto url if the string contains a .domain in it.. |
|
163 //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm" |
|
164 nsDependentString inString(aInString, aInLength); |
|
165 if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign.... |
|
166 { |
|
167 aOutString.AssignLiteral("mailto:"); |
|
168 aOutString += aInString; |
|
169 } |
|
170 } |
|
171 else if (aInString[pos] == '.') |
|
172 { |
|
173 if (ItMatchesDelimited(aInString, aInLength, |
|
174 MOZ_UTF16("www."), 4, LT_IGNORE, LT_IGNORE)) |
|
175 { |
|
176 aOutString.AssignLiteral("http://"); |
|
177 aOutString += aInString; |
|
178 } |
|
179 else if (ItMatchesDelimited(aInString,aInLength, MOZ_UTF16("ftp."), 4, LT_IGNORE, LT_IGNORE)) |
|
180 { |
|
181 aOutString.AssignLiteral("ftp://"); |
|
182 aOutString += aInString; |
|
183 } |
|
184 } |
|
185 } |
|
186 |
|
187 bool |
|
188 mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength, |
|
189 const uint32_t pos, const modetype check, |
|
190 uint32_t& start) |
|
191 { |
|
192 switch(check) |
|
193 { // no breaks, because end of blocks is never reached |
|
194 case RFC1738: |
|
195 { |
|
196 if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], MOZ_UTF16("<URL:"), 5)) |
|
197 { |
|
198 start = pos + 1; |
|
199 return true; |
|
200 } |
|
201 else |
|
202 return false; |
|
203 } |
|
204 case RFC2396E: |
|
205 { |
|
206 nsString temp(aInString, aInLength); |
|
207 int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(MOZ_UTF16("<>\""), pos - 1); |
|
208 if (i != kNotFound && (temp[uint32_t(i)] == '<' || |
|
209 temp[uint32_t(i)] == '"')) |
|
210 { |
|
211 start = uint32_t(++i); |
|
212 return start < pos; |
|
213 } |
|
214 else |
|
215 return false; |
|
216 } |
|
217 case freetext: |
|
218 { |
|
219 int32_t i = pos - 1; |
|
220 for (; i >= 0 && ( |
|
221 nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) || |
|
222 nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) || |
|
223 aInString[uint32_t(i)] == '+' || |
|
224 aInString[uint32_t(i)] == '-' || |
|
225 aInString[uint32_t(i)] == '.' |
|
226 ); i--) |
|
227 ; |
|
228 if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)])) |
|
229 { |
|
230 start = uint32_t(i); |
|
231 return true; |
|
232 } |
|
233 else |
|
234 return false; |
|
235 } |
|
236 case abbreviated: |
|
237 { |
|
238 int32_t i = pos - 1; |
|
239 // This disallows non-ascii-characters for email. |
|
240 // Currently correct, but revisit later after standards changed. |
|
241 bool isEmail = aInString[pos] == (char16_t)'@'; |
|
242 // These chars mark the start of the URL |
|
243 for (; i >= 0 |
|
244 && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<' |
|
245 && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\'' |
|
246 && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ',' |
|
247 && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '[' |
|
248 && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|' |
|
249 && aInString[uint32_t(i)] != '\\' |
|
250 && !IsSpace(aInString[uint32_t(i)]) |
|
251 && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)])) |
|
252 ; i--) |
|
253 ; |
|
254 if |
|
255 ( |
|
256 ++i >= 0 && uint32_t(i) < pos |
|
257 && |
|
258 ( |
|
259 nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) || |
|
260 nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) |
|
261 ) |
|
262 ) |
|
263 { |
|
264 start = uint32_t(i); |
|
265 return true; |
|
266 } |
|
267 else |
|
268 return false; |
|
269 } |
|
270 default: |
|
271 return false; |
|
272 } //switch |
|
273 } |
|
274 |
|
275 bool |
|
276 mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, |
|
277 const modetype check, const uint32_t start, uint32_t& end) |
|
278 { |
|
279 switch(check) |
|
280 { // no breaks, because end of blocks is never reached |
|
281 case RFC1738: |
|
282 case RFC2396E: |
|
283 { |
|
284 nsString temp(aInString, aInStringLength); |
|
285 |
|
286 int32_t i = temp.FindCharInSet(MOZ_UTF16("<>\""), pos + 1); |
|
287 if (i != kNotFound && temp[uint32_t(i--)] == |
|
288 (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) |
|
289 { |
|
290 end = uint32_t(i); |
|
291 return end > pos; |
|
292 } |
|
293 return false; |
|
294 } |
|
295 case freetext: |
|
296 case abbreviated: |
|
297 { |
|
298 uint32_t i = pos + 1; |
|
299 bool isEmail = aInString[pos] == (char16_t)'@'; |
|
300 bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL |
|
301 bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL |
|
302 for (; int32_t(i) < aInStringLength; i++) |
|
303 { |
|
304 // These chars mark the end of the URL |
|
305 if (aInString[i] == '>' || aInString[i] == '<' || |
|
306 aInString[i] == '"' || aInString[i] == '`' || |
|
307 aInString[i] == '}' || aInString[i] == '{' || |
|
308 aInString[i] == '|' || |
|
309 (aInString[i] == ')' && !seenOpeningParenthesis) || |
|
310 (aInString[i] == ']' && !seenOpeningSquareBracket) || |
|
311 // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo. |
|
312 (aInString[i] == '[' && i > 2 && |
|
313 (aInString[i - 1] != '/' || aInString[i - 2] != '/')) || |
|
314 IsSpace(aInString[i])) |
|
315 break; |
|
316 // Disallow non-ascii-characters for email. |
|
317 // Currently correct, but revisit later after standards changed. |
|
318 if (isEmail && ( |
|
319 aInString[i] == '(' || aInString[i] == '\'' || |
|
320 !nsCRT::IsAscii(aInString[i]))) |
|
321 break; |
|
322 if (aInString[i] == '(') |
|
323 seenOpeningParenthesis = true; |
|
324 if (aInString[i] == '[') |
|
325 seenOpeningSquareBracket = true; |
|
326 } |
|
327 // These chars are allowed in the middle of the URL, but not at end. |
|
328 // Technically they are, but are used in normal text after the URL. |
|
329 while (--i > pos && ( |
|
330 aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' || |
|
331 aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' || |
|
332 aInString[i] == ':' || aInString[i] == '\'' |
|
333 )) |
|
334 ; |
|
335 if (i > pos) |
|
336 { |
|
337 end = i; |
|
338 return true; |
|
339 } |
|
340 return false; |
|
341 } |
|
342 default: |
|
343 return false; |
|
344 } //switch |
|
345 } |
|
346 |
|
347 void |
|
348 mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, |
|
349 const uint32_t pos, const uint32_t whathasbeendone, |
|
350 const modetype check, const uint32_t start, const uint32_t end, |
|
351 nsString& txtURL, nsString& desc, |
|
352 int32_t& replaceBefore, int32_t& replaceAfter) |
|
353 { |
|
354 uint32_t descstart = start; |
|
355 switch(check) |
|
356 { |
|
357 case RFC1738: |
|
358 { |
|
359 descstart = start - 5; |
|
360 desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">" |
|
361 replaceAfter = end - pos + 1; |
|
362 } break; |
|
363 case RFC2396E: |
|
364 { |
|
365 descstart = start - 1; |
|
366 desc.Append(&aInString[descstart], end - descstart + 2); // include brackets |
|
367 replaceAfter = end - pos + 1; |
|
368 } break; |
|
369 case freetext: |
|
370 case abbreviated: |
|
371 { |
|
372 descstart = start; |
|
373 desc.Append(&aInString[descstart], end - start + 1); // don't include brackets |
|
374 replaceAfter = end - pos; |
|
375 } break; |
|
376 default: break; |
|
377 } //switch |
|
378 |
|
379 EscapeStr(desc, false); |
|
380 |
|
381 txtURL.Append(&aInString[start], end - start + 1); |
|
382 txtURL.StripWhitespace(); |
|
383 |
|
384 // FIX ME |
|
385 nsAutoString temp2; |
|
386 ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2); |
|
387 replaceBefore = temp2.Length(); |
|
388 return; |
|
389 } |
|
390 |
|
391 bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) |
|
392 { |
|
393 if (!mIOService) |
|
394 return false; |
|
395 |
|
396 nsAutoCString scheme; |
|
397 nsresult rv = mIOService->ExtractScheme(aURL, scheme); |
|
398 if(NS_FAILED(rv)) |
|
399 return false; |
|
400 |
|
401 // Get the handler for this scheme. |
|
402 nsCOMPtr<nsIProtocolHandler> handler; |
|
403 rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler)); |
|
404 if(NS_FAILED(rv)) |
|
405 return false; |
|
406 |
|
407 // Is it an external protocol handler? If not, linkify it. |
|
408 nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler); |
|
409 if (!externalHandler) |
|
410 return true; // handler is built-in, linkify it! |
|
411 |
|
412 // If external app exists for the scheme then linkify it. |
|
413 bool exists; |
|
414 rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists); |
|
415 return(NS_SUCCEEDED(rv) && exists); |
|
416 } |
|
417 |
|
418 bool |
|
419 mozTXTToHTMLConv::CheckURLAndCreateHTML( |
|
420 const nsString& txtURL, const nsString& desc, const modetype mode, |
|
421 nsString& outputHTML) |
|
422 { |
|
423 // Create *uri from txtURL |
|
424 nsCOMPtr<nsIURI> uri; |
|
425 nsresult rv; |
|
426 // Lazily initialize mIOService |
|
427 if (!mIOService) |
|
428 { |
|
429 mIOService = do_GetIOService(); |
|
430 |
|
431 if (!mIOService) |
|
432 return false; |
|
433 } |
|
434 |
|
435 // See if the url should be linkified. |
|
436 NS_ConvertUTF16toUTF8 utf8URL(txtURL); |
|
437 if (!ShouldLinkify(utf8URL)) |
|
438 return false; |
|
439 |
|
440 // it would be faster if we could just check to see if there is a protocol |
|
441 // handler for the url and return instead of actually trying to create a url... |
|
442 rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri)); |
|
443 |
|
444 // Real work |
|
445 if (NS_SUCCEEDED(rv) && uri) |
|
446 { |
|
447 outputHTML.AssignLiteral("<a class=\"moz-txt-link-"); |
|
448 switch(mode) |
|
449 { |
|
450 case RFC1738: |
|
451 outputHTML.AppendLiteral("rfc1738"); |
|
452 break; |
|
453 case RFC2396E: |
|
454 outputHTML.AppendLiteral("rfc2396E"); |
|
455 break; |
|
456 case freetext: |
|
457 outputHTML.AppendLiteral("freetext"); |
|
458 break; |
|
459 case abbreviated: |
|
460 outputHTML.AppendLiteral("abbreviated"); |
|
461 break; |
|
462 default: break; |
|
463 } |
|
464 nsAutoString escapedURL(txtURL); |
|
465 EscapeStr(escapedURL, true); |
|
466 |
|
467 outputHTML.AppendLiteral("\" href=\""); |
|
468 outputHTML += escapedURL; |
|
469 outputHTML.AppendLiteral("\">"); |
|
470 outputHTML += desc; |
|
471 outputHTML.AppendLiteral("</a>"); |
|
472 return true; |
|
473 } |
|
474 else |
|
475 return false; |
|
476 } |
|
477 |
|
478 NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos) |
|
479 { |
|
480 // call FindURL on the passed in string |
|
481 nsAutoString outputHTML; // we'll ignore the generated output HTML |
|
482 |
|
483 *aStartPos = -1; |
|
484 *aEndPos = -1; |
|
485 |
|
486 FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos); |
|
487 |
|
488 return NS_OK; |
|
489 } |
|
490 |
|
491 bool |
|
492 mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, |
|
493 const uint32_t whathasbeendone, |
|
494 nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter) |
|
495 { |
|
496 enum statetype {unchecked, invalid, startok, endok, success}; |
|
497 static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated}; |
|
498 |
|
499 statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode |
|
500 /* I don't like this abuse of enums as index for the array, |
|
501 but I don't know a better method */ |
|
502 |
|
503 // Define, which modes to check |
|
504 /* all modes but abbreviated are checked for text[pos] == ':', |
|
505 only abbreviated for '.', RFC2396E and abbreviated for '@' */ |
|
506 for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode; |
|
507 iState = modetype(iState + 1)) |
|
508 state[iState] = aInString[pos] == ':' ? unchecked : invalid; |
|
509 switch (aInString[pos]) |
|
510 { |
|
511 case '@': |
|
512 state[RFC2396E] = unchecked; |
|
513 // no break here |
|
514 case '.': |
|
515 state[abbreviated] = unchecked; |
|
516 break; |
|
517 case ':': |
|
518 state[abbreviated] = invalid; |
|
519 break; |
|
520 default: |
|
521 break; |
|
522 } |
|
523 |
|
524 // Test, first successful mode wins, sequence defined by |ranking| |
|
525 int32_t iCheck = 0; // the currently tested modetype |
|
526 modetype check = ranking[iCheck]; |
|
527 for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success; |
|
528 iCheck++) |
|
529 /* check state from last run. |
|
530 If this is the first, check this one, which isn't = success yet */ |
|
531 { |
|
532 check = ranking[iCheck]; |
|
533 |
|
534 uint32_t start, end; |
|
535 |
|
536 if (state[check] == unchecked) |
|
537 if (FindURLStart(aInString, aInLength, pos, check, start)) |
|
538 state[check] = startok; |
|
539 |
|
540 if (state[check] == startok) |
|
541 if (FindURLEnd(aInString, aInLength, pos, check, start, end)) |
|
542 state[check] = endok; |
|
543 |
|
544 if (state[check] == endok) |
|
545 { |
|
546 nsAutoString txtURL, desc; |
|
547 int32_t resultReplaceBefore, resultReplaceAfter; |
|
548 |
|
549 CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end, |
|
550 txtURL, desc, |
|
551 resultReplaceBefore, resultReplaceAfter); |
|
552 |
|
553 if (aInString[pos] != ':') |
|
554 { |
|
555 nsAutoString temp = txtURL; |
|
556 txtURL.SetLength(0); |
|
557 CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL); |
|
558 } |
|
559 |
|
560 if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check, |
|
561 outputHTML)) |
|
562 { |
|
563 replaceBefore = resultReplaceBefore; |
|
564 replaceAfter = resultReplaceAfter; |
|
565 state[check] = success; |
|
566 } |
|
567 } // if |
|
568 } // for |
|
569 return state[check] == success; |
|
570 } |
|
571 |
|
572 bool |
|
573 mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString, |
|
574 int32_t aInLength, const char16_t* rep, int32_t aRepLen, |
|
575 LIMTYPE before, LIMTYPE after) |
|
576 { |
|
577 |
|
578 // this little method gets called a LOT. I found we were spending a |
|
579 // lot of time just calculating the length of the variable "rep" |
|
580 // over and over again every time we called it. So we're now passing |
|
581 // an integer in here. |
|
582 int32_t textLen = aInLength; |
|
583 |
|
584 if |
|
585 ( |
|
586 ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) |
|
587 && textLen < aRepLen) || |
|
588 ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) |
|
589 && textLen < aRepLen + 1) || |
|
590 (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER |
|
591 && textLen < aRepLen + 2) |
|
592 ) |
|
593 return false; |
|
594 |
|
595 char16_t text0 = aInString[0]; |
|
596 char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)]; |
|
597 |
|
598 if |
|
599 ( |
|
600 (before == LT_ALPHA |
|
601 && !nsCRT::IsAsciiAlpha(text0)) || |
|
602 (before == LT_DIGIT |
|
603 && !nsCRT::IsAsciiDigit(text0)) || |
|
604 (before == LT_DELIMITER |
|
605 && |
|
606 ( |
|
607 nsCRT::IsAsciiAlpha(text0) || |
|
608 nsCRT::IsAsciiDigit(text0) || |
|
609 text0 == *rep |
|
610 )) || |
|
611 (after == LT_ALPHA |
|
612 && !nsCRT::IsAsciiAlpha(textAfterPos)) || |
|
613 (after == LT_DIGIT |
|
614 && !nsCRT::IsAsciiDigit(textAfterPos)) || |
|
615 (after == LT_DELIMITER |
|
616 && |
|
617 ( |
|
618 nsCRT::IsAsciiAlpha(textAfterPos) || |
|
619 nsCRT::IsAsciiDigit(textAfterPos) || |
|
620 textAfterPos == *rep |
|
621 )) || |
|
622 !Substring(Substring(aInString, aInString+aInLength), |
|
623 (before == LT_IGNORE ? 0 : 1), |
|
624 aRepLen).Equals(Substring(rep, rep+aRepLen), |
|
625 nsCaseInsensitiveStringComparator()) |
|
626 ) |
|
627 return false; |
|
628 |
|
629 return true; |
|
630 } |
|
631 |
|
632 uint32_t |
|
633 mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, |
|
634 const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after) |
|
635 { |
|
636 uint32_t result = 0; |
|
637 |
|
638 for (int32_t i = 0; i < aInStringLength; i++) |
|
639 { |
|
640 const char16_t * indexIntoString = &aInString[i]; |
|
641 if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after)) |
|
642 result++; |
|
643 } |
|
644 return result; |
|
645 } |
|
646 |
|
647 |
|
648 // NOTE: the converted html for the phrase is appended to aOutString |
|
649 // tagHTML and attributeHTML are plain ASCII (literal strings, in fact) |
|
650 bool |
|
651 mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, |
|
652 const char16_t* tagTXT, int32_t aTagTXTLen, |
|
653 const char* tagHTML, const char* attributeHTML, |
|
654 nsString& aOutString, uint32_t& openTags) |
|
655 { |
|
656 /* We're searching for the following pattern: |
|
657 LT_DELIMITER - "*" - ALPHA - |
|
658 [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER. |
|
659 <strong> is only inserted, if existence of a pair could be verified |
|
660 We use the first opening/closing tag, if we can choose */ |
|
661 |
|
662 const char16_t * newOffset = aInString; |
|
663 int32_t newLength = aInStringLength; |
|
664 if (!col0) // skip the first element? |
|
665 { |
|
666 newOffset = &aInString[1]; |
|
667 newLength = aInStringLength - 1; |
|
668 } |
|
669 |
|
670 // opening tag |
|
671 if |
|
672 ( |
|
673 ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, |
|
674 (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag |
|
675 && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, |
|
676 LT_ALPHA, LT_DELIMITER) // remaining closing tags |
|
677 > openTags |
|
678 ) |
|
679 { |
|
680 openTags++; |
|
681 aOutString.AppendLiteral("<"); |
|
682 aOutString.AppendASCII(tagHTML); |
|
683 aOutString.Append(char16_t(' ')); |
|
684 aOutString.AppendASCII(attributeHTML); |
|
685 aOutString.AppendLiteral("><span class=\"moz-txt-tag\">"); |
|
686 aOutString.Append(tagTXT); |
|
687 aOutString.AppendLiteral("</span>"); |
|
688 return true; |
|
689 } |
|
690 |
|
691 // closing tag |
|
692 else if (openTags > 0 |
|
693 && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER)) |
|
694 { |
|
695 openTags--; |
|
696 aOutString.AppendLiteral("<span class=\"moz-txt-tag\">"); |
|
697 aOutString.Append(tagTXT); |
|
698 aOutString.AppendLiteral("</span></"); |
|
699 aOutString.AppendASCII(tagHTML); |
|
700 aOutString.Append(char16_t('>')); |
|
701 return true; |
|
702 } |
|
703 |
|
704 return false; |
|
705 } |
|
706 |
|
707 |
|
708 bool |
|
709 mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, |
|
710 const char* tagTXT, const char* imageName, |
|
711 nsString& outputHTML, int32_t& glyphTextLen) |
|
712 { |
|
713 if ( !aInString || !tagTXT || !imageName ) |
|
714 return false; |
|
715 |
|
716 int32_t tagLen = strlen(tagTXT); |
|
717 |
|
718 uint32_t delim = (col0 ? 0 : 1) + tagLen; |
|
719 |
|
720 if |
|
721 ( |
|
722 (col0 || IsSpace(aInString[0])) |
|
723 && |
|
724 ( |
|
725 aLength <= int32_t(delim) || |
|
726 IsSpace(aInString[delim]) || |
|
727 (aLength > int32_t(delim + 1) |
|
728 && |
|
729 ( |
|
730 aInString[delim] == '.' || |
|
731 aInString[delim] == ',' || |
|
732 aInString[delim] == ';' || |
|
733 aInString[delim] == '8' || |
|
734 aInString[delim] == '>' || |
|
735 aInString[delim] == '!' || |
|
736 aInString[delim] == '?' |
|
737 ) |
|
738 && IsSpace(aInString[delim + 1])) |
|
739 ) |
|
740 && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, |
|
741 col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE) |
|
742 // Note: tests at different pos for LT_IGNORE and LT_DELIMITER |
|
743 ) |
|
744 { |
|
745 if (!col0) |
|
746 { |
|
747 outputHTML.Truncate(); |
|
748 outputHTML.Append(char16_t(' ')); |
|
749 } |
|
750 |
|
751 outputHTML.AppendLiteral("<span class=\""); // <span class=" |
|
752 AppendASCIItoUTF16(imageName, outputHTML); // e.g. smiley-frown |
|
753 outputHTML.AppendLiteral("\" title=\""); // " title=" |
|
754 AppendASCIItoUTF16(tagTXT, outputHTML); // smiley tooltip |
|
755 outputHTML.AppendLiteral("\"><span>"); // "><span> |
|
756 AppendASCIItoUTF16(tagTXT, outputHTML); // original text |
|
757 outputHTML.AppendLiteral("</span></span>"); // </span></span> |
|
758 glyphTextLen = (col0 ? 0 : 1) + tagLen; |
|
759 return true; |
|
760 } |
|
761 |
|
762 return false; |
|
763 } |
|
764 |
|
765 // the glyph is appended to aOutputString instead of the original string... |
|
766 bool |
|
767 mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, |
|
768 nsString& aOutputString, int32_t& glyphTextLen) |
|
769 { |
|
770 char16_t text0 = aInString[0]; |
|
771 char16_t text1 = aInString[1]; |
|
772 char16_t firstChar = (col0 ? text0 : text1); |
|
773 |
|
774 // temporary variable used to store the glyph html text |
|
775 nsAutoString outputHTML; |
|
776 bool bTestSmilie; |
|
777 bool bArg = false; |
|
778 int i; |
|
779 |
|
780 // refactor some of this mess to avoid code duplication and speed execution a bit |
|
781 // there are two cases that need to be tried one after another. To avoid a lot of |
|
782 // duplicate code, rolling into a loop |
|
783 |
|
784 i = 0; |
|
785 while ( i < 2 ) |
|
786 { |
|
787 bTestSmilie = false; |
|
788 if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O')) |
|
789 { |
|
790 // first test passed |
|
791 |
|
792 bTestSmilie = true; |
|
793 bArg = col0; |
|
794 } |
|
795 if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) ) |
|
796 { |
|
797 // second test passed |
|
798 |
|
799 bTestSmilie = true; |
|
800 bArg = false; |
|
801 } |
|
802 if ( bTestSmilie && ( |
|
803 SmilyHit(aInString, aInLength, bArg, |
|
804 ":-)", |
|
805 "moz-smiley-s1", // smile |
|
806 outputHTML, glyphTextLen) || |
|
807 |
|
808 SmilyHit(aInString, aInLength, bArg, |
|
809 ":)", |
|
810 "moz-smiley-s1", // smile |
|
811 outputHTML, glyphTextLen) || |
|
812 |
|
813 SmilyHit(aInString, aInLength, bArg, |
|
814 ":-D", |
|
815 "moz-smiley-s5", // laughing |
|
816 outputHTML, glyphTextLen) || |
|
817 |
|
818 SmilyHit(aInString, aInLength, bArg, |
|
819 ":-(", |
|
820 "moz-smiley-s2", // frown |
|
821 outputHTML, glyphTextLen) || |
|
822 |
|
823 SmilyHit(aInString, aInLength, bArg, |
|
824 ":(", |
|
825 "moz-smiley-s2", // frown |
|
826 outputHTML, glyphTextLen) || |
|
827 |
|
828 SmilyHit(aInString, aInLength, bArg, |
|
829 ":-[", |
|
830 "moz-smiley-s6", // embarassed |
|
831 outputHTML, glyphTextLen) || |
|
832 |
|
833 SmilyHit(aInString, aInLength, bArg, |
|
834 ";-)", |
|
835 "moz-smiley-s3", // wink |
|
836 outputHTML, glyphTextLen) || |
|
837 |
|
838 SmilyHit(aInString, aInLength, col0, |
|
839 ";)", |
|
840 "moz-smiley-s3", // wink |
|
841 outputHTML, glyphTextLen) || |
|
842 |
|
843 SmilyHit(aInString, aInLength, bArg, |
|
844 ":-\\", |
|
845 "moz-smiley-s7", // undecided |
|
846 outputHTML, glyphTextLen) || |
|
847 |
|
848 SmilyHit(aInString, aInLength, bArg, |
|
849 ":-P", |
|
850 "moz-smiley-s4", // tongue |
|
851 outputHTML, glyphTextLen) || |
|
852 |
|
853 SmilyHit(aInString, aInLength, bArg, |
|
854 ";-P", |
|
855 "moz-smiley-s4", // tongue |
|
856 outputHTML, glyphTextLen) || |
|
857 |
|
858 SmilyHit(aInString, aInLength, bArg, |
|
859 "=-O", |
|
860 "moz-smiley-s8", // surprise |
|
861 outputHTML, glyphTextLen) || |
|
862 |
|
863 SmilyHit(aInString, aInLength, bArg, |
|
864 ":-*", |
|
865 "moz-smiley-s9", // kiss |
|
866 outputHTML, glyphTextLen) || |
|
867 |
|
868 SmilyHit(aInString, aInLength, bArg, |
|
869 ">:o", |
|
870 "moz-smiley-s10", // yell |
|
871 outputHTML, glyphTextLen) || |
|
872 |
|
873 SmilyHit(aInString, aInLength, bArg, |
|
874 ">:-o", |
|
875 "moz-smiley-s10", // yell |
|
876 outputHTML, glyphTextLen) || |
|
877 |
|
878 SmilyHit(aInString, aInLength, bArg, |
|
879 "8-)", |
|
880 "moz-smiley-s11", // cool |
|
881 outputHTML, glyphTextLen) || |
|
882 |
|
883 SmilyHit(aInString, aInLength, bArg, |
|
884 ":-$", |
|
885 "moz-smiley-s12", // money |
|
886 outputHTML, glyphTextLen) || |
|
887 |
|
888 SmilyHit(aInString, aInLength, bArg, |
|
889 ":-!", |
|
890 "moz-smiley-s13", // foot |
|
891 outputHTML, glyphTextLen) || |
|
892 |
|
893 SmilyHit(aInString, aInLength, bArg, |
|
894 "O:-)", |
|
895 "moz-smiley-s14", // innocent |
|
896 outputHTML, glyphTextLen) || |
|
897 |
|
898 SmilyHit(aInString, aInLength, bArg, |
|
899 ":'(", |
|
900 "moz-smiley-s15", // cry |
|
901 outputHTML, glyphTextLen) || |
|
902 |
|
903 SmilyHit(aInString, aInLength, bArg, |
|
904 ":-X", |
|
905 "moz-smiley-s16", // sealed |
|
906 outputHTML, glyphTextLen) |
|
907 ) |
|
908 ) |
|
909 { |
|
910 aOutputString.Append(outputHTML); |
|
911 return true; |
|
912 } |
|
913 i++; |
|
914 } |
|
915 if (text0 == '\f') |
|
916 { |
|
917 aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>"); |
|
918 glyphTextLen = 1; |
|
919 return true; |
|
920 } |
|
921 if (text0 == '+' || text1 == '+') |
|
922 { |
|
923 if (ItMatchesDelimited(aInString, aInLength, |
|
924 MOZ_UTF16(" +/-"), 4, |
|
925 LT_IGNORE, LT_IGNORE)) |
|
926 { |
|
927 aOutputString.AppendLiteral(" ±"); |
|
928 glyphTextLen = 4; |
|
929 return true; |
|
930 } |
|
931 if (col0 && ItMatchesDelimited(aInString, aInLength, |
|
932 MOZ_UTF16("+/-"), 3, |
|
933 LT_IGNORE, LT_IGNORE)) |
|
934 { |
|
935 aOutputString.AppendLiteral("±"); |
|
936 glyphTextLen = 3; |
|
937 return true; |
|
938 } |
|
939 } |
|
940 |
|
941 // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5 |
|
942 // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/ |
|
943 if |
|
944 ( |
|
945 text1 == '^' |
|
946 && |
|
947 ( |
|
948 nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) || |
|
949 text0 == ')' || text0 == ']' || text0 == '}' |
|
950 ) |
|
951 && |
|
952 ( |
|
953 (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) || |
|
954 (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3])) |
|
955 ) |
|
956 ) |
|
957 { |
|
958 // Find first non-digit |
|
959 int32_t delimPos = 3; // skip "^" and first digit (or '-') |
|
960 for (; delimPos < aInLength |
|
961 && |
|
962 ( |
|
963 nsCRT::IsAsciiDigit(aInString[delimPos]) || |
|
964 (aInString[delimPos] == '.' && delimPos + 1 < aInLength && |
|
965 nsCRT::IsAsciiDigit(aInString[delimPos + 1])) |
|
966 ); |
|
967 delimPos++) |
|
968 ; |
|
969 |
|
970 if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos])) |
|
971 { |
|
972 return false; |
|
973 } |
|
974 |
|
975 outputHTML.Truncate(); |
|
976 outputHTML += text0; |
|
977 outputHTML.AppendLiteral( |
|
978 "<sup class=\"moz-txt-sup\">" |
|
979 "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">" |
|
980 "^</span>"); |
|
981 |
|
982 aOutputString.Append(outputHTML); |
|
983 aOutputString.Append(&aInString[2], delimPos - 2); |
|
984 aOutputString.AppendLiteral("</sup>"); |
|
985 |
|
986 glyphTextLen = delimPos /* - 1 + 1 */ ; |
|
987 return true; |
|
988 } |
|
989 /* |
|
990 The following strings are not substituted: |
|
991 |TXT |HTML |Reason |
|
992 +------+---------+---------- |
|
993 -> ← Bug #454 |
|
994 => ⇐ dito |
|
995 <- → dito |
|
996 <= ⇒ dito |
|
997 (tm) ™ dito |
|
998 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ... |
|
999 3/4 ¾ dito |
|
1000 1/2 ½ similar |
|
1001 */ |
|
1002 return false; |
|
1003 } |
|
1004 |
|
1005 /*************************************************************************** |
|
1006 Library-internal Interface |
|
1007 ****************************************************************************/ |
|
1008 |
|
1009 mozTXTToHTMLConv::mozTXTToHTMLConv() |
|
1010 { |
|
1011 } |
|
1012 |
|
1013 mozTXTToHTMLConv::~mozTXTToHTMLConv() |
|
1014 { |
|
1015 } |
|
1016 |
|
1017 NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, |
|
1018 mozITXTToHTMLConv, |
|
1019 nsIStreamConverter, |
|
1020 nsIStreamListener, |
|
1021 nsIRequestObserver) |
|
1022 |
|
1023 int32_t |
|
1024 mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, |
|
1025 uint32_t& logLineStart) |
|
1026 { |
|
1027 int32_t result = 0; |
|
1028 int32_t lineLength = NS_strlen(line); |
|
1029 |
|
1030 bool moreCites = true; |
|
1031 while (moreCites) |
|
1032 { |
|
1033 /* E.g. the following lines count as quote: |
|
1034 |
|
1035 > text |
|
1036 //#ifdef QUOTE_RECOGNITION_AGGRESSIVE |
|
1037 >text |
|
1038 //#ifdef QUOTE_RECOGNITION_AGGRESSIVE |
|
1039 > text |
|
1040 ] text |
|
1041 USER> text |
|
1042 USER] text |
|
1043 //#endif |
|
1044 |
|
1045 logLineStart is the position of "t" in this example |
|
1046 */ |
|
1047 uint32_t i = logLineStart; |
|
1048 |
|
1049 #ifdef QUOTE_RECOGNITION_AGGRESSIVE |
|
1050 for (; int32_t(i) < lineLength && IsSpace(line[i]); i++) |
|
1051 ; |
|
1052 for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i]) |
|
1053 && nsCRT::IsUpper(line[i]) ; i++) |
|
1054 ; |
|
1055 if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']')) |
|
1056 #else |
|
1057 if (int32_t(i) < lineLength && line[i] == '>') |
|
1058 #endif |
|
1059 { |
|
1060 i++; |
|
1061 if (int32_t(i) < lineLength && line[i] == ' ') |
|
1062 i++; |
|
1063 // sendmail/mbox |
|
1064 // Placed here for performance increase |
|
1065 const char16_t * indexString = &line[logLineStart]; |
|
1066 // here, |logLineStart < lineLength| is always true |
|
1067 uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString)); |
|
1068 if (Substring(indexString, |
|
1069 indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength), |
|
1070 nsCaseInsensitiveStringComparator())) |
|
1071 //XXX RFC2646 |
|
1072 moreCites = false; |
|
1073 else |
|
1074 { |
|
1075 result++; |
|
1076 logLineStart = i; |
|
1077 } |
|
1078 } |
|
1079 else |
|
1080 moreCites = false; |
|
1081 } |
|
1082 |
|
1083 return result; |
|
1084 } |
|
1085 |
|
1086 void |
|
1087 mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString) |
|
1088 { |
|
1089 bool doURLs = 0 != (whattodo & kURLs); |
|
1090 bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution); |
|
1091 bool doStructPhrase = 0 != (whattodo & kStructPhrase); |
|
1092 |
|
1093 uint32_t structPhrase_strong = 0; // Number of currently open tags |
|
1094 uint32_t structPhrase_underline = 0; |
|
1095 uint32_t structPhrase_italic = 0; |
|
1096 uint32_t structPhrase_code = 0; |
|
1097 |
|
1098 nsAutoString outputHTML; // moved here for performance increase |
|
1099 |
|
1100 for(uint32_t i = 0; int32_t(i) < aInStringLength;) |
|
1101 { |
|
1102 if (doGlyphSubstitution) |
|
1103 { |
|
1104 int32_t glyphTextLen; |
|
1105 if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen)) |
|
1106 { |
|
1107 i += glyphTextLen; |
|
1108 continue; |
|
1109 } |
|
1110 } |
|
1111 |
|
1112 if (doStructPhrase) |
|
1113 { |
|
1114 const char16_t * newOffset = aInString; |
|
1115 int32_t newLength = aInStringLength; |
|
1116 if (i > 0 ) // skip the first element? |
|
1117 { |
|
1118 newOffset = &aInString[i-1]; |
|
1119 newLength = aInStringLength - i + 1; |
|
1120 } |
|
1121 |
|
1122 switch (aInString[i]) // Performance increase |
|
1123 { |
|
1124 case '*': |
|
1125 if (StructPhraseHit(newOffset, newLength, i == 0, |
|
1126 MOZ_UTF16("*"), 1, |
|
1127 "b", "class=\"moz-txt-star\"", |
|
1128 aOutString, structPhrase_strong)) |
|
1129 { |
|
1130 i++; |
|
1131 continue; |
|
1132 } |
|
1133 break; |
|
1134 case '/': |
|
1135 if (StructPhraseHit(newOffset, newLength, i == 0, |
|
1136 MOZ_UTF16("/"), 1, |
|
1137 "i", "class=\"moz-txt-slash\"", |
|
1138 aOutString, structPhrase_italic)) |
|
1139 { |
|
1140 i++; |
|
1141 continue; |
|
1142 } |
|
1143 break; |
|
1144 case '_': |
|
1145 if (StructPhraseHit(newOffset, newLength, i == 0, |
|
1146 MOZ_UTF16("_"), 1, |
|
1147 "span" /* <u> is deprecated */, |
|
1148 "class=\"moz-txt-underscore\"", |
|
1149 aOutString, structPhrase_underline)) |
|
1150 { |
|
1151 i++; |
|
1152 continue; |
|
1153 } |
|
1154 break; |
|
1155 case '|': |
|
1156 if (StructPhraseHit(newOffset, newLength, i == 0, |
|
1157 MOZ_UTF16("|"), 1, |
|
1158 "code", "class=\"moz-txt-verticalline\"", |
|
1159 aOutString, structPhrase_code)) |
|
1160 { |
|
1161 i++; |
|
1162 continue; |
|
1163 } |
|
1164 break; |
|
1165 } |
|
1166 } |
|
1167 |
|
1168 if (doURLs) |
|
1169 { |
|
1170 switch (aInString[i]) |
|
1171 { |
|
1172 case ':': |
|
1173 case '@': |
|
1174 case '.': |
|
1175 if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase |
|
1176 { |
|
1177 int32_t replaceBefore; |
|
1178 int32_t replaceAfter; |
|
1179 if (FindURL(aInString, aInStringLength, i, whattodo, |
|
1180 outputHTML, replaceBefore, replaceAfter) |
|
1181 && structPhrase_strong + structPhrase_italic + |
|
1182 structPhrase_underline + structPhrase_code == 0 |
|
1183 /* workaround for bug #19445 */ ) |
|
1184 { |
|
1185 aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore); |
|
1186 aOutString += outputHTML; |
|
1187 i += replaceAfter + 1; |
|
1188 continue; |
|
1189 } |
|
1190 } |
|
1191 break; |
|
1192 } //switch |
|
1193 } |
|
1194 |
|
1195 switch (aInString[i]) |
|
1196 { |
|
1197 // Special symbols |
|
1198 case '<': |
|
1199 case '>': |
|
1200 case '&': |
|
1201 EscapeChar(aInString[i], aOutString, false); |
|
1202 i++; |
|
1203 break; |
|
1204 // Normal characters |
|
1205 default: |
|
1206 aOutString += aInString[i]; |
|
1207 i++; |
|
1208 break; |
|
1209 } |
|
1210 } |
|
1211 } |
|
1212 |
|
1213 void |
|
1214 mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString) |
|
1215 { |
|
1216 // some common variables we were recalculating |
|
1217 // every time inside the for loop... |
|
1218 int32_t lengthOfInString = aInString.Length(); |
|
1219 const char16_t * uniBuffer = aInString.get(); |
|
1220 |
|
1221 #ifdef DEBUG_BenB_Perf |
|
1222 PRTime parsing_start = PR_IntervalNow(); |
|
1223 #endif |
|
1224 |
|
1225 // Look for simple entities not included in a tags and scan them. |
|
1226 /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>") |
|
1227 or in a tag ("<!--[...]-->"). |
|
1228 Unescape the rest (text between tags) and pass it to ScanTXT. */ |
|
1229 for (int32_t i = 0; i < lengthOfInString;) |
|
1230 { |
|
1231 if (aInString[i] == '<') // html tag |
|
1232 { |
|
1233 uint32_t start = uint32_t(i); |
|
1234 if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a') |
|
1235 // if a tag, skip until </a> |
|
1236 { |
|
1237 i = aInString.Find("</a>", true, i); |
|
1238 if (i == kNotFound) |
|
1239 i = lengthOfInString; |
|
1240 else |
|
1241 i += 4; |
|
1242 } |
|
1243 else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' && |
|
1244 aInString[uint32_t(i) + 3] == '-') |
|
1245 //if out-commended code, skip until --> |
|
1246 { |
|
1247 i = aInString.Find("-->", false, i); |
|
1248 if (i == kNotFound) |
|
1249 i = lengthOfInString; |
|
1250 else |
|
1251 i += 3; |
|
1252 |
|
1253 } |
|
1254 else // just skip tag (attributes etc.) |
|
1255 { |
|
1256 i = aInString.FindChar('>', i); |
|
1257 if (i == kNotFound) |
|
1258 i = lengthOfInString; |
|
1259 else |
|
1260 i++; |
|
1261 } |
|
1262 aOutString.Append(&uniBuffer[start], uint32_t(i) - start); |
|
1263 } |
|
1264 else |
|
1265 { |
|
1266 uint32_t start = uint32_t(i); |
|
1267 i = aInString.FindChar('<', i); |
|
1268 if (i == kNotFound) |
|
1269 i = lengthOfInString; |
|
1270 |
|
1271 nsString tempString; |
|
1272 tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate)); |
|
1273 UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString); |
|
1274 ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString); |
|
1275 } |
|
1276 } |
|
1277 |
|
1278 #ifdef DEBUG_BenB_Perf |
|
1279 printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); |
|
1280 #endif |
|
1281 } |
|
1282 |
|
1283 /**************************************************************************** |
|
1284 XPCOM Interface |
|
1285 *****************************************************************************/ |
|
1286 |
|
1287 NS_IMETHODIMP |
|
1288 mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream, |
|
1289 const char *aFromType, |
|
1290 const char *aToType, |
|
1291 nsISupports *aCtxt, nsIInputStream **_retval) |
|
1292 { |
|
1293 return NS_ERROR_NOT_IMPLEMENTED; |
|
1294 } |
|
1295 |
|
1296 NS_IMETHODIMP |
|
1297 mozTXTToHTMLConv::AsyncConvertData(const char *aFromType, |
|
1298 const char *aToType, |
|
1299 nsIStreamListener *aListener, nsISupports *aCtxt) { |
|
1300 return NS_ERROR_NOT_IMPLEMENTED; |
|
1301 } |
|
1302 |
|
1303 NS_IMETHODIMP |
|
1304 mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt, |
|
1305 nsIInputStream *inStr, uint64_t sourceOffset, |
|
1306 uint32_t count) |
|
1307 { |
|
1308 return NS_ERROR_NOT_IMPLEMENTED; |
|
1309 } |
|
1310 |
|
1311 NS_IMETHODIMP |
|
1312 mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt) |
|
1313 { |
|
1314 return NS_ERROR_NOT_IMPLEMENTED; |
|
1315 } |
|
1316 |
|
1317 NS_IMETHODIMP |
|
1318 mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt, |
|
1319 nsresult aStatus) |
|
1320 { |
|
1321 return NS_ERROR_NOT_IMPLEMENTED; |
|
1322 } |
|
1323 |
|
1324 NS_IMETHODIMP |
|
1325 mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart, |
|
1326 uint32_t *_retval) |
|
1327 { |
|
1328 if (!logLineStart || !_retval || !line) |
|
1329 return NS_ERROR_NULL_POINTER; |
|
1330 *_retval = CiteLevelTXT(line, *logLineStart); |
|
1331 return NS_OK; |
|
1332 } |
|
1333 |
|
1334 NS_IMETHODIMP |
|
1335 mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo, |
|
1336 char16_t **_retval) |
|
1337 { |
|
1338 NS_ENSURE_ARG(text); |
|
1339 |
|
1340 // FIX ME!!! |
|
1341 nsString outString; |
|
1342 int32_t inLength = NS_strlen(text); |
|
1343 // by setting a large capacity up front, we save time |
|
1344 // when appending characters to the output string because we don't |
|
1345 // need to reallocate and re-copy the characters already in the out String. |
|
1346 NS_ASSERTION(inLength, "ScanTXT passed 0 length string"); |
|
1347 if (inLength == 0) { |
|
1348 *_retval = NS_strdup(text); |
|
1349 return NS_OK; |
|
1350 } |
|
1351 |
|
1352 outString.SetCapacity(uint32_t(inLength * growthRate)); |
|
1353 ScanTXT(text, inLength, whattodo, outString); |
|
1354 |
|
1355 *_retval = ToNewUnicode(outString); |
|
1356 return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; |
|
1357 } |
|
1358 |
|
1359 NS_IMETHODIMP |
|
1360 mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo, |
|
1361 char16_t **_retval) |
|
1362 { |
|
1363 NS_ENSURE_ARG(text); |
|
1364 |
|
1365 // FIX ME!!! |
|
1366 nsString outString; |
|
1367 nsString inString (text); // look at this nasty extra copy of the entire input buffer! |
|
1368 outString.SetCapacity(uint32_t(inString.Length() * growthRate)); |
|
1369 |
|
1370 ScanHTML(inString, whattodo, outString); |
|
1371 *_retval = ToNewUnicode(outString); |
|
1372 return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; |
|
1373 } |
|
1374 |
|
1375 nsresult |
|
1376 MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) |
|
1377 { |
|
1378 NS_PRECONDITION(aConv != nullptr, "null ptr"); |
|
1379 if (!aConv) |
|
1380 return NS_ERROR_NULL_POINTER; |
|
1381 |
|
1382 *aConv = new mozTXTToHTMLConv(); |
|
1383 if (!*aConv) |
|
1384 return NS_ERROR_OUT_OF_MEMORY; |
|
1385 |
|
1386 NS_ADDREF(*aConv); |
|
1387 // return (*aConv)->Init(); |
|
1388 return NS_OK; |
|
1389 } |