1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1389 @@ 1.4 +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "mozTXTToHTMLConv.h" 1.10 +#include "nsNetUtil.h" 1.11 +#include "nsUnicharUtils.h" 1.12 +#include "nsCRT.h" 1.13 +#include "nsIExternalProtocolHandler.h" 1.14 +#include "nsIIOService.h" 1.15 + 1.16 +#include <algorithm> 1.17 + 1.18 +#ifdef DEBUG_BenB_Perf 1.19 +#include "prtime.h" 1.20 +#include "prinrval.h" 1.21 +#endif 1.22 + 1.23 +const double growthRate = 1.2; 1.24 + 1.25 +// Bug 183111, editor now replaces multiple spaces with leading 1.26 +// 0xA0's and a single ending space, so need to treat 0xA0's as spaces. 1.27 +// 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)" 1.28 +// Also recognize the Japanese ideographic space 0x3000 as a space. 1.29 +static inline bool IsSpace(const char16_t aChar) 1.30 +{ 1.31 + return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000); 1.32 +} 1.33 + 1.34 +// Escape Char will take ch, escape it and append the result to 1.35 +// aStringToAppendTo 1.36 +void 1.37 +mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo, 1.38 + bool inAttribute) 1.39 +{ 1.40 + switch (ch) 1.41 + { 1.42 + case '<': 1.43 + aStringToAppendTo.AppendLiteral("<"); 1.44 + break; 1.45 + case '>': 1.46 + aStringToAppendTo.AppendLiteral(">"); 1.47 + break; 1.48 + case '&': 1.49 + aStringToAppendTo.AppendLiteral("&"); 1.50 + break; 1.51 + case '"': 1.52 + if (inAttribute) 1.53 + { 1.54 + aStringToAppendTo.AppendLiteral("""); 1.55 + break; 1.56 + } 1.57 + // else fall through 1.58 + default: 1.59 + aStringToAppendTo += ch; 1.60 + } 1.61 + 1.62 + return; 1.63 +} 1.64 + 1.65 +// EscapeStr takes the passed in string and 1.66 +// escapes it IN PLACE. 1.67 +void 1.68 +mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) 1.69 +{ 1.70 + // the replace substring routines 1.71 + // don't seem to work if you have a character 1.72 + // in the in string that is also in the replacement 1.73 + // string! =( 1.74 + //aInString.ReplaceSubstring("&", "&"); 1.75 + //aInString.ReplaceSubstring("<", "<"); 1.76 + //aInString.ReplaceSubstring(">", ">"); 1.77 + for (uint32_t i = 0; i < aInString.Length();) 1.78 + { 1.79 + switch (aInString[i]) 1.80 + { 1.81 + case '<': 1.82 + aInString.Cut(i, 1); 1.83 + aInString.Insert(NS_LITERAL_STRING("<"), i); 1.84 + i += 4; // skip past the integers we just added 1.85 + break; 1.86 + case '>': 1.87 + aInString.Cut(i, 1); 1.88 + aInString.Insert(NS_LITERAL_STRING(">"), i); 1.89 + i += 4; // skip past the integers we just added 1.90 + break; 1.91 + case '&': 1.92 + aInString.Cut(i, 1); 1.93 + aInString.Insert(NS_LITERAL_STRING("&"), i); 1.94 + i += 5; // skip past the integers we just added 1.95 + break; 1.96 + case '"': 1.97 + if (inAttribute) 1.98 + { 1.99 + aInString.Cut(i, 1); 1.100 + aInString.Insert(NS_LITERAL_STRING("""), i); 1.101 + i += 6; 1.102 + break; 1.103 + } 1.104 + // else fall through 1.105 + default: 1.106 + i++; 1.107 + } 1.108 + } 1.109 +} 1.110 + 1.111 +void 1.112 +mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString) 1.113 +{ 1.114 + const char16_t * subString = nullptr; 1.115 + for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) 1.116 + { 1.117 + int32_t remainingChars = i - aStartPos; 1.118 + if (aInString[i] == '&') 1.119 + { 1.120 + subString = &aInString[i]; 1.121 + if (!nsCRT::strncmp(subString, MOZ_UTF16("<"), std::min(4, aLength - remainingChars))) 1.122 + { 1.123 + aOutString.Append(char16_t('<')); 1.124 + i += 4; 1.125 + } 1.126 + else if (!nsCRT::strncmp(subString, MOZ_UTF16(">"), std::min(4, aLength - remainingChars))) 1.127 + { 1.128 + aOutString.Append(char16_t('>')); 1.129 + i += 4; 1.130 + } 1.131 + else if (!nsCRT::strncmp(subString, MOZ_UTF16("&"), std::min(5, aLength - remainingChars))) 1.132 + { 1.133 + aOutString.Append(char16_t('&')); 1.134 + i += 5; 1.135 + } 1.136 + else if (!nsCRT::strncmp(subString, MOZ_UTF16("""), std::min(6, aLength - remainingChars))) 1.137 + { 1.138 + aOutString.Append(char16_t('"')); 1.139 + i += 6; 1.140 + } 1.141 + else 1.142 + { 1.143 + aOutString += aInString[i]; 1.144 + i++; 1.145 + } 1.146 + } 1.147 + else 1.148 + { 1.149 + aOutString += aInString[i]; 1.150 + i++; 1.151 + } 1.152 + } 1.153 +} 1.154 + 1.155 +void 1.156 +mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, 1.157 + const uint32_t pos, nsString& aOutString) 1.158 +{ 1.159 + NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851"); 1.160 + if (int32_t(pos) >= aInLength) 1.161 + return; 1.162 + 1.163 + if (aInString[pos] == '@') 1.164 + { 1.165 + // only pre-pend a mailto url if the string contains a .domain in it.. 1.166 + //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm" 1.167 + nsDependentString inString(aInString, aInLength); 1.168 + if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign.... 1.169 + { 1.170 + aOutString.AssignLiteral("mailto:"); 1.171 + aOutString += aInString; 1.172 + } 1.173 + } 1.174 + else if (aInString[pos] == '.') 1.175 + { 1.176 + if (ItMatchesDelimited(aInString, aInLength, 1.177 + MOZ_UTF16("www."), 4, LT_IGNORE, LT_IGNORE)) 1.178 + { 1.179 + aOutString.AssignLiteral("http://"); 1.180 + aOutString += aInString; 1.181 + } 1.182 + else if (ItMatchesDelimited(aInString,aInLength, MOZ_UTF16("ftp."), 4, LT_IGNORE, LT_IGNORE)) 1.183 + { 1.184 + aOutString.AssignLiteral("ftp://"); 1.185 + aOutString += aInString; 1.186 + } 1.187 + } 1.188 +} 1.189 + 1.190 +bool 1.191 +mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength, 1.192 + const uint32_t pos, const modetype check, 1.193 + uint32_t& start) 1.194 +{ 1.195 + switch(check) 1.196 + { // no breaks, because end of blocks is never reached 1.197 + case RFC1738: 1.198 + { 1.199 + if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], MOZ_UTF16("<URL:"), 5)) 1.200 + { 1.201 + start = pos + 1; 1.202 + return true; 1.203 + } 1.204 + else 1.205 + return false; 1.206 + } 1.207 + case RFC2396E: 1.208 + { 1.209 + nsString temp(aInString, aInLength); 1.210 + int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(MOZ_UTF16("<>\""), pos - 1); 1.211 + if (i != kNotFound && (temp[uint32_t(i)] == '<' || 1.212 + temp[uint32_t(i)] == '"')) 1.213 + { 1.214 + start = uint32_t(++i); 1.215 + return start < pos; 1.216 + } 1.217 + else 1.218 + return false; 1.219 + } 1.220 + case freetext: 1.221 + { 1.222 + int32_t i = pos - 1; 1.223 + for (; i >= 0 && ( 1.224 + nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) || 1.225 + nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) || 1.226 + aInString[uint32_t(i)] == '+' || 1.227 + aInString[uint32_t(i)] == '-' || 1.228 + aInString[uint32_t(i)] == '.' 1.229 + ); i--) 1.230 + ; 1.231 + if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)])) 1.232 + { 1.233 + start = uint32_t(i); 1.234 + return true; 1.235 + } 1.236 + else 1.237 + return false; 1.238 + } 1.239 + case abbreviated: 1.240 + { 1.241 + int32_t i = pos - 1; 1.242 + // This disallows non-ascii-characters for email. 1.243 + // Currently correct, but revisit later after standards changed. 1.244 + bool isEmail = aInString[pos] == (char16_t)'@'; 1.245 + // These chars mark the start of the URL 1.246 + for (; i >= 0 1.247 + && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<' 1.248 + && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\'' 1.249 + && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ',' 1.250 + && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '[' 1.251 + && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|' 1.252 + && aInString[uint32_t(i)] != '\\' 1.253 + && !IsSpace(aInString[uint32_t(i)]) 1.254 + && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)])) 1.255 + ; i--) 1.256 + ; 1.257 + if 1.258 + ( 1.259 + ++i >= 0 && uint32_t(i) < pos 1.260 + && 1.261 + ( 1.262 + nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) || 1.263 + nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) 1.264 + ) 1.265 + ) 1.266 + { 1.267 + start = uint32_t(i); 1.268 + return true; 1.269 + } 1.270 + else 1.271 + return false; 1.272 + } 1.273 + default: 1.274 + return false; 1.275 + } //switch 1.276 +} 1.277 + 1.278 +bool 1.279 +mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, 1.280 + const modetype check, const uint32_t start, uint32_t& end) 1.281 +{ 1.282 + switch(check) 1.283 + { // no breaks, because end of blocks is never reached 1.284 + case RFC1738: 1.285 + case RFC2396E: 1.286 + { 1.287 + nsString temp(aInString, aInStringLength); 1.288 + 1.289 + int32_t i = temp.FindCharInSet(MOZ_UTF16("<>\""), pos + 1); 1.290 + if (i != kNotFound && temp[uint32_t(i--)] == 1.291 + (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) 1.292 + { 1.293 + end = uint32_t(i); 1.294 + return end > pos; 1.295 + } 1.296 + return false; 1.297 + } 1.298 + case freetext: 1.299 + case abbreviated: 1.300 + { 1.301 + uint32_t i = pos + 1; 1.302 + bool isEmail = aInString[pos] == (char16_t)'@'; 1.303 + bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL 1.304 + bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL 1.305 + for (; int32_t(i) < aInStringLength; i++) 1.306 + { 1.307 + // These chars mark the end of the URL 1.308 + if (aInString[i] == '>' || aInString[i] == '<' || 1.309 + aInString[i] == '"' || aInString[i] == '`' || 1.310 + aInString[i] == '}' || aInString[i] == '{' || 1.311 + aInString[i] == '|' || 1.312 + (aInString[i] == ')' && !seenOpeningParenthesis) || 1.313 + (aInString[i] == ']' && !seenOpeningSquareBracket) || 1.314 + // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo. 1.315 + (aInString[i] == '[' && i > 2 && 1.316 + (aInString[i - 1] != '/' || aInString[i - 2] != '/')) || 1.317 + IsSpace(aInString[i])) 1.318 + break; 1.319 + // Disallow non-ascii-characters for email. 1.320 + // Currently correct, but revisit later after standards changed. 1.321 + if (isEmail && ( 1.322 + aInString[i] == '(' || aInString[i] == '\'' || 1.323 + !nsCRT::IsAscii(aInString[i]))) 1.324 + break; 1.325 + if (aInString[i] == '(') 1.326 + seenOpeningParenthesis = true; 1.327 + if (aInString[i] == '[') 1.328 + seenOpeningSquareBracket = true; 1.329 + } 1.330 + // These chars are allowed in the middle of the URL, but not at end. 1.331 + // Technically they are, but are used in normal text after the URL. 1.332 + while (--i > pos && ( 1.333 + aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' || 1.334 + aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' || 1.335 + aInString[i] == ':' || aInString[i] == '\'' 1.336 + )) 1.337 + ; 1.338 + if (i > pos) 1.339 + { 1.340 + end = i; 1.341 + return true; 1.342 + } 1.343 + return false; 1.344 + } 1.345 + default: 1.346 + return false; 1.347 + } //switch 1.348 +} 1.349 + 1.350 +void 1.351 +mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, 1.352 + const uint32_t pos, const uint32_t whathasbeendone, 1.353 + const modetype check, const uint32_t start, const uint32_t end, 1.354 + nsString& txtURL, nsString& desc, 1.355 + int32_t& replaceBefore, int32_t& replaceAfter) 1.356 +{ 1.357 + uint32_t descstart = start; 1.358 + switch(check) 1.359 + { 1.360 + case RFC1738: 1.361 + { 1.362 + descstart = start - 5; 1.363 + desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">" 1.364 + replaceAfter = end - pos + 1; 1.365 + } break; 1.366 + case RFC2396E: 1.367 + { 1.368 + descstart = start - 1; 1.369 + desc.Append(&aInString[descstart], end - descstart + 2); // include brackets 1.370 + replaceAfter = end - pos + 1; 1.371 + } break; 1.372 + case freetext: 1.373 + case abbreviated: 1.374 + { 1.375 + descstart = start; 1.376 + desc.Append(&aInString[descstart], end - start + 1); // don't include brackets 1.377 + replaceAfter = end - pos; 1.378 + } break; 1.379 + default: break; 1.380 + } //switch 1.381 + 1.382 + EscapeStr(desc, false); 1.383 + 1.384 + txtURL.Append(&aInString[start], end - start + 1); 1.385 + txtURL.StripWhitespace(); 1.386 + 1.387 + // FIX ME 1.388 + nsAutoString temp2; 1.389 + ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2); 1.390 + replaceBefore = temp2.Length(); 1.391 + return; 1.392 +} 1.393 + 1.394 +bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) 1.395 +{ 1.396 + if (!mIOService) 1.397 + return false; 1.398 + 1.399 + nsAutoCString scheme; 1.400 + nsresult rv = mIOService->ExtractScheme(aURL, scheme); 1.401 + if(NS_FAILED(rv)) 1.402 + return false; 1.403 + 1.404 + // Get the handler for this scheme. 1.405 + nsCOMPtr<nsIProtocolHandler> handler; 1.406 + rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler)); 1.407 + if(NS_FAILED(rv)) 1.408 + return false; 1.409 + 1.410 + // Is it an external protocol handler? If not, linkify it. 1.411 + nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler); 1.412 + if (!externalHandler) 1.413 + return true; // handler is built-in, linkify it! 1.414 + 1.415 + // If external app exists for the scheme then linkify it. 1.416 + bool exists; 1.417 + rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists); 1.418 + return(NS_SUCCEEDED(rv) && exists); 1.419 +} 1.420 + 1.421 +bool 1.422 +mozTXTToHTMLConv::CheckURLAndCreateHTML( 1.423 + const nsString& txtURL, const nsString& desc, const modetype mode, 1.424 + nsString& outputHTML) 1.425 +{ 1.426 + // Create *uri from txtURL 1.427 + nsCOMPtr<nsIURI> uri; 1.428 + nsresult rv; 1.429 + // Lazily initialize mIOService 1.430 + if (!mIOService) 1.431 + { 1.432 + mIOService = do_GetIOService(); 1.433 + 1.434 + if (!mIOService) 1.435 + return false; 1.436 + } 1.437 + 1.438 + // See if the url should be linkified. 1.439 + NS_ConvertUTF16toUTF8 utf8URL(txtURL); 1.440 + if (!ShouldLinkify(utf8URL)) 1.441 + return false; 1.442 + 1.443 + // it would be faster if we could just check to see if there is a protocol 1.444 + // handler for the url and return instead of actually trying to create a url... 1.445 + rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri)); 1.446 + 1.447 + // Real work 1.448 + if (NS_SUCCEEDED(rv) && uri) 1.449 + { 1.450 + outputHTML.AssignLiteral("<a class=\"moz-txt-link-"); 1.451 + switch(mode) 1.452 + { 1.453 + case RFC1738: 1.454 + outputHTML.AppendLiteral("rfc1738"); 1.455 + break; 1.456 + case RFC2396E: 1.457 + outputHTML.AppendLiteral("rfc2396E"); 1.458 + break; 1.459 + case freetext: 1.460 + outputHTML.AppendLiteral("freetext"); 1.461 + break; 1.462 + case abbreviated: 1.463 + outputHTML.AppendLiteral("abbreviated"); 1.464 + break; 1.465 + default: break; 1.466 + } 1.467 + nsAutoString escapedURL(txtURL); 1.468 + EscapeStr(escapedURL, true); 1.469 + 1.470 + outputHTML.AppendLiteral("\" href=\""); 1.471 + outputHTML += escapedURL; 1.472 + outputHTML.AppendLiteral("\">"); 1.473 + outputHTML += desc; 1.474 + outputHTML.AppendLiteral("</a>"); 1.475 + return true; 1.476 + } 1.477 + else 1.478 + return false; 1.479 +} 1.480 + 1.481 +NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos) 1.482 +{ 1.483 + // call FindURL on the passed in string 1.484 + nsAutoString outputHTML; // we'll ignore the generated output HTML 1.485 + 1.486 + *aStartPos = -1; 1.487 + *aEndPos = -1; 1.488 + 1.489 + FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos); 1.490 + 1.491 + return NS_OK; 1.492 +} 1.493 + 1.494 +bool 1.495 +mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, 1.496 + const uint32_t whathasbeendone, 1.497 + nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter) 1.498 +{ 1.499 + enum statetype {unchecked, invalid, startok, endok, success}; 1.500 + static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated}; 1.501 + 1.502 + statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode 1.503 + /* I don't like this abuse of enums as index for the array, 1.504 + but I don't know a better method */ 1.505 + 1.506 + // Define, which modes to check 1.507 + /* all modes but abbreviated are checked for text[pos] == ':', 1.508 + only abbreviated for '.', RFC2396E and abbreviated for '@' */ 1.509 + for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode; 1.510 + iState = modetype(iState + 1)) 1.511 + state[iState] = aInString[pos] == ':' ? unchecked : invalid; 1.512 + switch (aInString[pos]) 1.513 + { 1.514 + case '@': 1.515 + state[RFC2396E] = unchecked; 1.516 + // no break here 1.517 + case '.': 1.518 + state[abbreviated] = unchecked; 1.519 + break; 1.520 + case ':': 1.521 + state[abbreviated] = invalid; 1.522 + break; 1.523 + default: 1.524 + break; 1.525 + } 1.526 + 1.527 + // Test, first successful mode wins, sequence defined by |ranking| 1.528 + int32_t iCheck = 0; // the currently tested modetype 1.529 + modetype check = ranking[iCheck]; 1.530 + for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success; 1.531 + iCheck++) 1.532 + /* check state from last run. 1.533 + If this is the first, check this one, which isn't = success yet */ 1.534 + { 1.535 + check = ranking[iCheck]; 1.536 + 1.537 + uint32_t start, end; 1.538 + 1.539 + if (state[check] == unchecked) 1.540 + if (FindURLStart(aInString, aInLength, pos, check, start)) 1.541 + state[check] = startok; 1.542 + 1.543 + if (state[check] == startok) 1.544 + if (FindURLEnd(aInString, aInLength, pos, check, start, end)) 1.545 + state[check] = endok; 1.546 + 1.547 + if (state[check] == endok) 1.548 + { 1.549 + nsAutoString txtURL, desc; 1.550 + int32_t resultReplaceBefore, resultReplaceAfter; 1.551 + 1.552 + CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end, 1.553 + txtURL, desc, 1.554 + resultReplaceBefore, resultReplaceAfter); 1.555 + 1.556 + if (aInString[pos] != ':') 1.557 + { 1.558 + nsAutoString temp = txtURL; 1.559 + txtURL.SetLength(0); 1.560 + CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL); 1.561 + } 1.562 + 1.563 + if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check, 1.564 + outputHTML)) 1.565 + { 1.566 + replaceBefore = resultReplaceBefore; 1.567 + replaceAfter = resultReplaceAfter; 1.568 + state[check] = success; 1.569 + } 1.570 + } // if 1.571 + } // for 1.572 + return state[check] == success; 1.573 +} 1.574 + 1.575 +bool 1.576 +mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString, 1.577 + int32_t aInLength, const char16_t* rep, int32_t aRepLen, 1.578 + LIMTYPE before, LIMTYPE after) 1.579 +{ 1.580 + 1.581 + // this little method gets called a LOT. I found we were spending a 1.582 + // lot of time just calculating the length of the variable "rep" 1.583 + // over and over again every time we called it. So we're now passing 1.584 + // an integer in here. 1.585 + int32_t textLen = aInLength; 1.586 + 1.587 + if 1.588 + ( 1.589 + ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) 1.590 + && textLen < aRepLen) || 1.591 + ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) 1.592 + && textLen < aRepLen + 1) || 1.593 + (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER 1.594 + && textLen < aRepLen + 2) 1.595 + ) 1.596 + return false; 1.597 + 1.598 + char16_t text0 = aInString[0]; 1.599 + char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)]; 1.600 + 1.601 + if 1.602 + ( 1.603 + (before == LT_ALPHA 1.604 + && !nsCRT::IsAsciiAlpha(text0)) || 1.605 + (before == LT_DIGIT 1.606 + && !nsCRT::IsAsciiDigit(text0)) || 1.607 + (before == LT_DELIMITER 1.608 + && 1.609 + ( 1.610 + nsCRT::IsAsciiAlpha(text0) || 1.611 + nsCRT::IsAsciiDigit(text0) || 1.612 + text0 == *rep 1.613 + )) || 1.614 + (after == LT_ALPHA 1.615 + && !nsCRT::IsAsciiAlpha(textAfterPos)) || 1.616 + (after == LT_DIGIT 1.617 + && !nsCRT::IsAsciiDigit(textAfterPos)) || 1.618 + (after == LT_DELIMITER 1.619 + && 1.620 + ( 1.621 + nsCRT::IsAsciiAlpha(textAfterPos) || 1.622 + nsCRT::IsAsciiDigit(textAfterPos) || 1.623 + textAfterPos == *rep 1.624 + )) || 1.625 + !Substring(Substring(aInString, aInString+aInLength), 1.626 + (before == LT_IGNORE ? 0 : 1), 1.627 + aRepLen).Equals(Substring(rep, rep+aRepLen), 1.628 + nsCaseInsensitiveStringComparator()) 1.629 + ) 1.630 + return false; 1.631 + 1.632 + return true; 1.633 +} 1.634 + 1.635 +uint32_t 1.636 +mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, 1.637 + const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after) 1.638 +{ 1.639 + uint32_t result = 0; 1.640 + 1.641 + for (int32_t i = 0; i < aInStringLength; i++) 1.642 + { 1.643 + const char16_t * indexIntoString = &aInString[i]; 1.644 + if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after)) 1.645 + result++; 1.646 + } 1.647 + return result; 1.648 +} 1.649 + 1.650 + 1.651 +// NOTE: the converted html for the phrase is appended to aOutString 1.652 +// tagHTML and attributeHTML are plain ASCII (literal strings, in fact) 1.653 +bool 1.654 +mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, 1.655 + const char16_t* tagTXT, int32_t aTagTXTLen, 1.656 + const char* tagHTML, const char* attributeHTML, 1.657 + nsString& aOutString, uint32_t& openTags) 1.658 +{ 1.659 + /* We're searching for the following pattern: 1.660 + LT_DELIMITER - "*" - ALPHA - 1.661 + [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER. 1.662 + <strong> is only inserted, if existence of a pair could be verified 1.663 + We use the first opening/closing tag, if we can choose */ 1.664 + 1.665 + const char16_t * newOffset = aInString; 1.666 + int32_t newLength = aInStringLength; 1.667 + if (!col0) // skip the first element? 1.668 + { 1.669 + newOffset = &aInString[1]; 1.670 + newLength = aInStringLength - 1; 1.671 + } 1.672 + 1.673 + // opening tag 1.674 + if 1.675 + ( 1.676 + ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, 1.677 + (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag 1.678 + && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, 1.679 + LT_ALPHA, LT_DELIMITER) // remaining closing tags 1.680 + > openTags 1.681 + ) 1.682 + { 1.683 + openTags++; 1.684 + aOutString.AppendLiteral("<"); 1.685 + aOutString.AppendASCII(tagHTML); 1.686 + aOutString.Append(char16_t(' ')); 1.687 + aOutString.AppendASCII(attributeHTML); 1.688 + aOutString.AppendLiteral("><span class=\"moz-txt-tag\">"); 1.689 + aOutString.Append(tagTXT); 1.690 + aOutString.AppendLiteral("</span>"); 1.691 + return true; 1.692 + } 1.693 + 1.694 + // closing tag 1.695 + else if (openTags > 0 1.696 + && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER)) 1.697 + { 1.698 + openTags--; 1.699 + aOutString.AppendLiteral("<span class=\"moz-txt-tag\">"); 1.700 + aOutString.Append(tagTXT); 1.701 + aOutString.AppendLiteral("</span></"); 1.702 + aOutString.AppendASCII(tagHTML); 1.703 + aOutString.Append(char16_t('>')); 1.704 + return true; 1.705 + } 1.706 + 1.707 + return false; 1.708 +} 1.709 + 1.710 + 1.711 +bool 1.712 +mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, 1.713 + const char* tagTXT, const char* imageName, 1.714 + nsString& outputHTML, int32_t& glyphTextLen) 1.715 +{ 1.716 + if ( !aInString || !tagTXT || !imageName ) 1.717 + return false; 1.718 + 1.719 + int32_t tagLen = strlen(tagTXT); 1.720 + 1.721 + uint32_t delim = (col0 ? 0 : 1) + tagLen; 1.722 + 1.723 + if 1.724 + ( 1.725 + (col0 || IsSpace(aInString[0])) 1.726 + && 1.727 + ( 1.728 + aLength <= int32_t(delim) || 1.729 + IsSpace(aInString[delim]) || 1.730 + (aLength > int32_t(delim + 1) 1.731 + && 1.732 + ( 1.733 + aInString[delim] == '.' || 1.734 + aInString[delim] == ',' || 1.735 + aInString[delim] == ';' || 1.736 + aInString[delim] == '8' || 1.737 + aInString[delim] == '>' || 1.738 + aInString[delim] == '!' || 1.739 + aInString[delim] == '?' 1.740 + ) 1.741 + && IsSpace(aInString[delim + 1])) 1.742 + ) 1.743 + && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, 1.744 + col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE) 1.745 + // Note: tests at different pos for LT_IGNORE and LT_DELIMITER 1.746 + ) 1.747 + { 1.748 + if (!col0) 1.749 + { 1.750 + outputHTML.Truncate(); 1.751 + outputHTML.Append(char16_t(' ')); 1.752 + } 1.753 + 1.754 + outputHTML.AppendLiteral("<span class=\""); // <span class=" 1.755 + AppendASCIItoUTF16(imageName, outputHTML); // e.g. smiley-frown 1.756 + outputHTML.AppendLiteral("\" title=\""); // " title=" 1.757 + AppendASCIItoUTF16(tagTXT, outputHTML); // smiley tooltip 1.758 + outputHTML.AppendLiteral("\"><span>"); // "><span> 1.759 + AppendASCIItoUTF16(tagTXT, outputHTML); // original text 1.760 + outputHTML.AppendLiteral("</span></span>"); // </span></span> 1.761 + glyphTextLen = (col0 ? 0 : 1) + tagLen; 1.762 + return true; 1.763 + } 1.764 + 1.765 + return false; 1.766 +} 1.767 + 1.768 +// the glyph is appended to aOutputString instead of the original string... 1.769 +bool 1.770 +mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, 1.771 + nsString& aOutputString, int32_t& glyphTextLen) 1.772 +{ 1.773 + char16_t text0 = aInString[0]; 1.774 + char16_t text1 = aInString[1]; 1.775 + char16_t firstChar = (col0 ? text0 : text1); 1.776 + 1.777 + // temporary variable used to store the glyph html text 1.778 + nsAutoString outputHTML; 1.779 + bool bTestSmilie; 1.780 + bool bArg = false; 1.781 + int i; 1.782 + 1.783 + // refactor some of this mess to avoid code duplication and speed execution a bit 1.784 + // there are two cases that need to be tried one after another. To avoid a lot of 1.785 + // duplicate code, rolling into a loop 1.786 + 1.787 + i = 0; 1.788 + while ( i < 2 ) 1.789 + { 1.790 + bTestSmilie = false; 1.791 + if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O')) 1.792 + { 1.793 + // first test passed 1.794 + 1.795 + bTestSmilie = true; 1.796 + bArg = col0; 1.797 + } 1.798 + if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) ) 1.799 + { 1.800 + // second test passed 1.801 + 1.802 + bTestSmilie = true; 1.803 + bArg = false; 1.804 + } 1.805 + if ( bTestSmilie && ( 1.806 + SmilyHit(aInString, aInLength, bArg, 1.807 + ":-)", 1.808 + "moz-smiley-s1", // smile 1.809 + outputHTML, glyphTextLen) || 1.810 + 1.811 + SmilyHit(aInString, aInLength, bArg, 1.812 + ":)", 1.813 + "moz-smiley-s1", // smile 1.814 + outputHTML, glyphTextLen) || 1.815 + 1.816 + SmilyHit(aInString, aInLength, bArg, 1.817 + ":-D", 1.818 + "moz-smiley-s5", // laughing 1.819 + outputHTML, glyphTextLen) || 1.820 + 1.821 + SmilyHit(aInString, aInLength, bArg, 1.822 + ":-(", 1.823 + "moz-smiley-s2", // frown 1.824 + outputHTML, glyphTextLen) || 1.825 + 1.826 + SmilyHit(aInString, aInLength, bArg, 1.827 + ":(", 1.828 + "moz-smiley-s2", // frown 1.829 + outputHTML, glyphTextLen) || 1.830 + 1.831 + SmilyHit(aInString, aInLength, bArg, 1.832 + ":-[", 1.833 + "moz-smiley-s6", // embarassed 1.834 + outputHTML, glyphTextLen) || 1.835 + 1.836 + SmilyHit(aInString, aInLength, bArg, 1.837 + ";-)", 1.838 + "moz-smiley-s3", // wink 1.839 + outputHTML, glyphTextLen) || 1.840 + 1.841 + SmilyHit(aInString, aInLength, col0, 1.842 + ";)", 1.843 + "moz-smiley-s3", // wink 1.844 + outputHTML, glyphTextLen) || 1.845 + 1.846 + SmilyHit(aInString, aInLength, bArg, 1.847 + ":-\\", 1.848 + "moz-smiley-s7", // undecided 1.849 + outputHTML, glyphTextLen) || 1.850 + 1.851 + SmilyHit(aInString, aInLength, bArg, 1.852 + ":-P", 1.853 + "moz-smiley-s4", // tongue 1.854 + outputHTML, glyphTextLen) || 1.855 + 1.856 + SmilyHit(aInString, aInLength, bArg, 1.857 + ";-P", 1.858 + "moz-smiley-s4", // tongue 1.859 + outputHTML, glyphTextLen) || 1.860 + 1.861 + SmilyHit(aInString, aInLength, bArg, 1.862 + "=-O", 1.863 + "moz-smiley-s8", // surprise 1.864 + outputHTML, glyphTextLen) || 1.865 + 1.866 + SmilyHit(aInString, aInLength, bArg, 1.867 + ":-*", 1.868 + "moz-smiley-s9", // kiss 1.869 + outputHTML, glyphTextLen) || 1.870 + 1.871 + SmilyHit(aInString, aInLength, bArg, 1.872 + ">:o", 1.873 + "moz-smiley-s10", // yell 1.874 + outputHTML, glyphTextLen) || 1.875 + 1.876 + SmilyHit(aInString, aInLength, bArg, 1.877 + ">:-o", 1.878 + "moz-smiley-s10", // yell 1.879 + outputHTML, glyphTextLen) || 1.880 + 1.881 + SmilyHit(aInString, aInLength, bArg, 1.882 + "8-)", 1.883 + "moz-smiley-s11", // cool 1.884 + outputHTML, glyphTextLen) || 1.885 + 1.886 + SmilyHit(aInString, aInLength, bArg, 1.887 + ":-$", 1.888 + "moz-smiley-s12", // money 1.889 + outputHTML, glyphTextLen) || 1.890 + 1.891 + SmilyHit(aInString, aInLength, bArg, 1.892 + ":-!", 1.893 + "moz-smiley-s13", // foot 1.894 + outputHTML, glyphTextLen) || 1.895 + 1.896 + SmilyHit(aInString, aInLength, bArg, 1.897 + "O:-)", 1.898 + "moz-smiley-s14", // innocent 1.899 + outputHTML, glyphTextLen) || 1.900 + 1.901 + SmilyHit(aInString, aInLength, bArg, 1.902 + ":'(", 1.903 + "moz-smiley-s15", // cry 1.904 + outputHTML, glyphTextLen) || 1.905 + 1.906 + SmilyHit(aInString, aInLength, bArg, 1.907 + ":-X", 1.908 + "moz-smiley-s16", // sealed 1.909 + outputHTML, glyphTextLen) 1.910 + ) 1.911 + ) 1.912 + { 1.913 + aOutputString.Append(outputHTML); 1.914 + return true; 1.915 + } 1.916 + i++; 1.917 + } 1.918 + if (text0 == '\f') 1.919 + { 1.920 + aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>"); 1.921 + glyphTextLen = 1; 1.922 + return true; 1.923 + } 1.924 + if (text0 == '+' || text1 == '+') 1.925 + { 1.926 + if (ItMatchesDelimited(aInString, aInLength, 1.927 + MOZ_UTF16(" +/-"), 4, 1.928 + LT_IGNORE, LT_IGNORE)) 1.929 + { 1.930 + aOutputString.AppendLiteral(" ±"); 1.931 + glyphTextLen = 4; 1.932 + return true; 1.933 + } 1.934 + if (col0 && ItMatchesDelimited(aInString, aInLength, 1.935 + MOZ_UTF16("+/-"), 3, 1.936 + LT_IGNORE, LT_IGNORE)) 1.937 + { 1.938 + aOutputString.AppendLiteral("±"); 1.939 + glyphTextLen = 3; 1.940 + return true; 1.941 + } 1.942 + } 1.943 + 1.944 + // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5 1.945 + // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/ 1.946 + if 1.947 + ( 1.948 + text1 == '^' 1.949 + && 1.950 + ( 1.951 + nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) || 1.952 + text0 == ')' || text0 == ']' || text0 == '}' 1.953 + ) 1.954 + && 1.955 + ( 1.956 + (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) || 1.957 + (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3])) 1.958 + ) 1.959 + ) 1.960 + { 1.961 + // Find first non-digit 1.962 + int32_t delimPos = 3; // skip "^" and first digit (or '-') 1.963 + for (; delimPos < aInLength 1.964 + && 1.965 + ( 1.966 + nsCRT::IsAsciiDigit(aInString[delimPos]) || 1.967 + (aInString[delimPos] == '.' && delimPos + 1 < aInLength && 1.968 + nsCRT::IsAsciiDigit(aInString[delimPos + 1])) 1.969 + ); 1.970 + delimPos++) 1.971 + ; 1.972 + 1.973 + if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos])) 1.974 + { 1.975 + return false; 1.976 + } 1.977 + 1.978 + outputHTML.Truncate(); 1.979 + outputHTML += text0; 1.980 + outputHTML.AppendLiteral( 1.981 + "<sup class=\"moz-txt-sup\">" 1.982 + "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">" 1.983 + "^</span>"); 1.984 + 1.985 + aOutputString.Append(outputHTML); 1.986 + aOutputString.Append(&aInString[2], delimPos - 2); 1.987 + aOutputString.AppendLiteral("</sup>"); 1.988 + 1.989 + glyphTextLen = delimPos /* - 1 + 1 */ ; 1.990 + return true; 1.991 + } 1.992 + /* 1.993 + The following strings are not substituted: 1.994 + |TXT |HTML |Reason 1.995 + +------+---------+---------- 1.996 + -> ← Bug #454 1.997 + => ⇐ dito 1.998 + <- → dito 1.999 + <= ⇒ dito 1.1000 + (tm) ™ dito 1.1001 + 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ... 1.1002 + 3/4 ¾ dito 1.1003 + 1/2 ½ similar 1.1004 + */ 1.1005 + return false; 1.1006 +} 1.1007 + 1.1008 +/*************************************************************************** 1.1009 + Library-internal Interface 1.1010 +****************************************************************************/ 1.1011 + 1.1012 +mozTXTToHTMLConv::mozTXTToHTMLConv() 1.1013 +{ 1.1014 +} 1.1015 + 1.1016 +mozTXTToHTMLConv::~mozTXTToHTMLConv() 1.1017 +{ 1.1018 +} 1.1019 + 1.1020 +NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, 1.1021 + mozITXTToHTMLConv, 1.1022 + nsIStreamConverter, 1.1023 + nsIStreamListener, 1.1024 + nsIRequestObserver) 1.1025 + 1.1026 +int32_t 1.1027 +mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, 1.1028 + uint32_t& logLineStart) 1.1029 +{ 1.1030 + int32_t result = 0; 1.1031 + int32_t lineLength = NS_strlen(line); 1.1032 + 1.1033 + bool moreCites = true; 1.1034 + while (moreCites) 1.1035 + { 1.1036 + /* E.g. the following lines count as quote: 1.1037 + 1.1038 + > text 1.1039 + //#ifdef QUOTE_RECOGNITION_AGGRESSIVE 1.1040 + >text 1.1041 + //#ifdef QUOTE_RECOGNITION_AGGRESSIVE 1.1042 + > text 1.1043 + ] text 1.1044 + USER> text 1.1045 + USER] text 1.1046 + //#endif 1.1047 + 1.1048 + logLineStart is the position of "t" in this example 1.1049 + */ 1.1050 + uint32_t i = logLineStart; 1.1051 + 1.1052 +#ifdef QUOTE_RECOGNITION_AGGRESSIVE 1.1053 + for (; int32_t(i) < lineLength && IsSpace(line[i]); i++) 1.1054 + ; 1.1055 + for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i]) 1.1056 + && nsCRT::IsUpper(line[i]) ; i++) 1.1057 + ; 1.1058 + if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']')) 1.1059 +#else 1.1060 + if (int32_t(i) < lineLength && line[i] == '>') 1.1061 +#endif 1.1062 + { 1.1063 + i++; 1.1064 + if (int32_t(i) < lineLength && line[i] == ' ') 1.1065 + i++; 1.1066 + // sendmail/mbox 1.1067 + // Placed here for performance increase 1.1068 + const char16_t * indexString = &line[logLineStart]; 1.1069 + // here, |logLineStart < lineLength| is always true 1.1070 + uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString)); 1.1071 + if (Substring(indexString, 1.1072 + indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength), 1.1073 + nsCaseInsensitiveStringComparator())) 1.1074 + //XXX RFC2646 1.1075 + moreCites = false; 1.1076 + else 1.1077 + { 1.1078 + result++; 1.1079 + logLineStart = i; 1.1080 + } 1.1081 + } 1.1082 + else 1.1083 + moreCites = false; 1.1084 + } 1.1085 + 1.1086 + return result; 1.1087 +} 1.1088 + 1.1089 +void 1.1090 +mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString) 1.1091 +{ 1.1092 + bool doURLs = 0 != (whattodo & kURLs); 1.1093 + bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution); 1.1094 + bool doStructPhrase = 0 != (whattodo & kStructPhrase); 1.1095 + 1.1096 + uint32_t structPhrase_strong = 0; // Number of currently open tags 1.1097 + uint32_t structPhrase_underline = 0; 1.1098 + uint32_t structPhrase_italic = 0; 1.1099 + uint32_t structPhrase_code = 0; 1.1100 + 1.1101 + nsAutoString outputHTML; // moved here for performance increase 1.1102 + 1.1103 + for(uint32_t i = 0; int32_t(i) < aInStringLength;) 1.1104 + { 1.1105 + if (doGlyphSubstitution) 1.1106 + { 1.1107 + int32_t glyphTextLen; 1.1108 + if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen)) 1.1109 + { 1.1110 + i += glyphTextLen; 1.1111 + continue; 1.1112 + } 1.1113 + } 1.1114 + 1.1115 + if (doStructPhrase) 1.1116 + { 1.1117 + const char16_t * newOffset = aInString; 1.1118 + int32_t newLength = aInStringLength; 1.1119 + if (i > 0 ) // skip the first element? 1.1120 + { 1.1121 + newOffset = &aInString[i-1]; 1.1122 + newLength = aInStringLength - i + 1; 1.1123 + } 1.1124 + 1.1125 + switch (aInString[i]) // Performance increase 1.1126 + { 1.1127 + case '*': 1.1128 + if (StructPhraseHit(newOffset, newLength, i == 0, 1.1129 + MOZ_UTF16("*"), 1, 1.1130 + "b", "class=\"moz-txt-star\"", 1.1131 + aOutString, structPhrase_strong)) 1.1132 + { 1.1133 + i++; 1.1134 + continue; 1.1135 + } 1.1136 + break; 1.1137 + case '/': 1.1138 + if (StructPhraseHit(newOffset, newLength, i == 0, 1.1139 + MOZ_UTF16("/"), 1, 1.1140 + "i", "class=\"moz-txt-slash\"", 1.1141 + aOutString, structPhrase_italic)) 1.1142 + { 1.1143 + i++; 1.1144 + continue; 1.1145 + } 1.1146 + break; 1.1147 + case '_': 1.1148 + if (StructPhraseHit(newOffset, newLength, i == 0, 1.1149 + MOZ_UTF16("_"), 1, 1.1150 + "span" /* <u> is deprecated */, 1.1151 + "class=\"moz-txt-underscore\"", 1.1152 + aOutString, structPhrase_underline)) 1.1153 + { 1.1154 + i++; 1.1155 + continue; 1.1156 + } 1.1157 + break; 1.1158 + case '|': 1.1159 + if (StructPhraseHit(newOffset, newLength, i == 0, 1.1160 + MOZ_UTF16("|"), 1, 1.1161 + "code", "class=\"moz-txt-verticalline\"", 1.1162 + aOutString, structPhrase_code)) 1.1163 + { 1.1164 + i++; 1.1165 + continue; 1.1166 + } 1.1167 + break; 1.1168 + } 1.1169 + } 1.1170 + 1.1171 + if (doURLs) 1.1172 + { 1.1173 + switch (aInString[i]) 1.1174 + { 1.1175 + case ':': 1.1176 + case '@': 1.1177 + case '.': 1.1178 + if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase 1.1179 + { 1.1180 + int32_t replaceBefore; 1.1181 + int32_t replaceAfter; 1.1182 + if (FindURL(aInString, aInStringLength, i, whattodo, 1.1183 + outputHTML, replaceBefore, replaceAfter) 1.1184 + && structPhrase_strong + structPhrase_italic + 1.1185 + structPhrase_underline + structPhrase_code == 0 1.1186 + /* workaround for bug #19445 */ ) 1.1187 + { 1.1188 + aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore); 1.1189 + aOutString += outputHTML; 1.1190 + i += replaceAfter + 1; 1.1191 + continue; 1.1192 + } 1.1193 + } 1.1194 + break; 1.1195 + } //switch 1.1196 + } 1.1197 + 1.1198 + switch (aInString[i]) 1.1199 + { 1.1200 + // Special symbols 1.1201 + case '<': 1.1202 + case '>': 1.1203 + case '&': 1.1204 + EscapeChar(aInString[i], aOutString, false); 1.1205 + i++; 1.1206 + break; 1.1207 + // Normal characters 1.1208 + default: 1.1209 + aOutString += aInString[i]; 1.1210 + i++; 1.1211 + break; 1.1212 + } 1.1213 + } 1.1214 +} 1.1215 + 1.1216 +void 1.1217 +mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString) 1.1218 +{ 1.1219 + // some common variables we were recalculating 1.1220 + // every time inside the for loop... 1.1221 + int32_t lengthOfInString = aInString.Length(); 1.1222 + const char16_t * uniBuffer = aInString.get(); 1.1223 + 1.1224 +#ifdef DEBUG_BenB_Perf 1.1225 + PRTime parsing_start = PR_IntervalNow(); 1.1226 +#endif 1.1227 + 1.1228 + // Look for simple entities not included in a tags and scan them. 1.1229 + /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>") 1.1230 + or in a tag ("<!--[...]-->"). 1.1231 + Unescape the rest (text between tags) and pass it to ScanTXT. */ 1.1232 + for (int32_t i = 0; i < lengthOfInString;) 1.1233 + { 1.1234 + if (aInString[i] == '<') // html tag 1.1235 + { 1.1236 + uint32_t start = uint32_t(i); 1.1237 + if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a') 1.1238 + // if a tag, skip until </a> 1.1239 + { 1.1240 + i = aInString.Find("</a>", true, i); 1.1241 + if (i == kNotFound) 1.1242 + i = lengthOfInString; 1.1243 + else 1.1244 + i += 4; 1.1245 + } 1.1246 + else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' && 1.1247 + aInString[uint32_t(i) + 3] == '-') 1.1248 + //if out-commended code, skip until --> 1.1249 + { 1.1250 + i = aInString.Find("-->", false, i); 1.1251 + if (i == kNotFound) 1.1252 + i = lengthOfInString; 1.1253 + else 1.1254 + i += 3; 1.1255 + 1.1256 + } 1.1257 + else // just skip tag (attributes etc.) 1.1258 + { 1.1259 + i = aInString.FindChar('>', i); 1.1260 + if (i == kNotFound) 1.1261 + i = lengthOfInString; 1.1262 + else 1.1263 + i++; 1.1264 + } 1.1265 + aOutString.Append(&uniBuffer[start], uint32_t(i) - start); 1.1266 + } 1.1267 + else 1.1268 + { 1.1269 + uint32_t start = uint32_t(i); 1.1270 + i = aInString.FindChar('<', i); 1.1271 + if (i == kNotFound) 1.1272 + i = lengthOfInString; 1.1273 + 1.1274 + nsString tempString; 1.1275 + tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate)); 1.1276 + UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString); 1.1277 + ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString); 1.1278 + } 1.1279 + } 1.1280 + 1.1281 +#ifdef DEBUG_BenB_Perf 1.1282 + printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); 1.1283 +#endif 1.1284 +} 1.1285 + 1.1286 +/**************************************************************************** 1.1287 + XPCOM Interface 1.1288 +*****************************************************************************/ 1.1289 + 1.1290 +NS_IMETHODIMP 1.1291 +mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream, 1.1292 + const char *aFromType, 1.1293 + const char *aToType, 1.1294 + nsISupports *aCtxt, nsIInputStream **_retval) 1.1295 +{ 1.1296 + return NS_ERROR_NOT_IMPLEMENTED; 1.1297 +} 1.1298 + 1.1299 +NS_IMETHODIMP 1.1300 +mozTXTToHTMLConv::AsyncConvertData(const char *aFromType, 1.1301 + const char *aToType, 1.1302 + nsIStreamListener *aListener, nsISupports *aCtxt) { 1.1303 + return NS_ERROR_NOT_IMPLEMENTED; 1.1304 +} 1.1305 + 1.1306 +NS_IMETHODIMP 1.1307 +mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt, 1.1308 + nsIInputStream *inStr, uint64_t sourceOffset, 1.1309 + uint32_t count) 1.1310 +{ 1.1311 + return NS_ERROR_NOT_IMPLEMENTED; 1.1312 +} 1.1313 + 1.1314 +NS_IMETHODIMP 1.1315 +mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt) 1.1316 +{ 1.1317 + return NS_ERROR_NOT_IMPLEMENTED; 1.1318 +} 1.1319 + 1.1320 +NS_IMETHODIMP 1.1321 +mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt, 1.1322 + nsresult aStatus) 1.1323 +{ 1.1324 + return NS_ERROR_NOT_IMPLEMENTED; 1.1325 +} 1.1326 + 1.1327 +NS_IMETHODIMP 1.1328 +mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart, 1.1329 + uint32_t *_retval) 1.1330 +{ 1.1331 + if (!logLineStart || !_retval || !line) 1.1332 + return NS_ERROR_NULL_POINTER; 1.1333 + *_retval = CiteLevelTXT(line, *logLineStart); 1.1334 + return NS_OK; 1.1335 +} 1.1336 + 1.1337 +NS_IMETHODIMP 1.1338 +mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo, 1.1339 + char16_t **_retval) 1.1340 +{ 1.1341 + NS_ENSURE_ARG(text); 1.1342 + 1.1343 + // FIX ME!!! 1.1344 + nsString outString; 1.1345 + int32_t inLength = NS_strlen(text); 1.1346 + // by setting a large capacity up front, we save time 1.1347 + // when appending characters to the output string because we don't 1.1348 + // need to reallocate and re-copy the characters already in the out String. 1.1349 + NS_ASSERTION(inLength, "ScanTXT passed 0 length string"); 1.1350 + if (inLength == 0) { 1.1351 + *_retval = NS_strdup(text); 1.1352 + return NS_OK; 1.1353 + } 1.1354 + 1.1355 + outString.SetCapacity(uint32_t(inLength * growthRate)); 1.1356 + ScanTXT(text, inLength, whattodo, outString); 1.1357 + 1.1358 + *_retval = ToNewUnicode(outString); 1.1359 + return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; 1.1360 +} 1.1361 + 1.1362 +NS_IMETHODIMP 1.1363 +mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo, 1.1364 + char16_t **_retval) 1.1365 +{ 1.1366 + NS_ENSURE_ARG(text); 1.1367 + 1.1368 + // FIX ME!!! 1.1369 + nsString outString; 1.1370 + nsString inString (text); // look at this nasty extra copy of the entire input buffer! 1.1371 + outString.SetCapacity(uint32_t(inString.Length() * growthRate)); 1.1372 + 1.1373 + ScanHTML(inString, whattodo, outString); 1.1374 + *_retval = ToNewUnicode(outString); 1.1375 + return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; 1.1376 +} 1.1377 + 1.1378 +nsresult 1.1379 +MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) 1.1380 +{ 1.1381 + NS_PRECONDITION(aConv != nullptr, "null ptr"); 1.1382 + if (!aConv) 1.1383 + return NS_ERROR_NULL_POINTER; 1.1384 + 1.1385 + *aConv = new mozTXTToHTMLConv(); 1.1386 + if (!*aConv) 1.1387 + return NS_ERROR_OUT_OF_MEMORY; 1.1388 + 1.1389 + NS_ADDREF(*aConv); 1.1390 + // return (*aConv)->Init(); 1.1391 + return NS_OK; 1.1392 +}