netwerk/streamconv/converters/mozTXTToHTMLConv.cpp

Thu, 15 Jan 2015 21:03:48 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 21:03:48 +0100
branch
TOR_BUG_9701
changeset 11
deefc01c0e14
permissions
-rw-r--r--

Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)

michael@0 1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "mozTXTToHTMLConv.h"
michael@0 7 #include "nsNetUtil.h"
michael@0 8 #include "nsUnicharUtils.h"
michael@0 9 #include "nsCRT.h"
michael@0 10 #include "nsIExternalProtocolHandler.h"
michael@0 11 #include "nsIIOService.h"
michael@0 12
michael@0 13 #include <algorithm>
michael@0 14
michael@0 15 #ifdef DEBUG_BenB_Perf
michael@0 16 #include "prtime.h"
michael@0 17 #include "prinrval.h"
michael@0 18 #endif
michael@0 19
michael@0 20 const double growthRate = 1.2;
michael@0 21
michael@0 22 // Bug 183111, editor now replaces multiple spaces with leading
michael@0 23 // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
michael@0 24 // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
michael@0 25 // Also recognize the Japanese ideographic space 0x3000 as a space.
michael@0 26 static inline bool IsSpace(const char16_t aChar)
michael@0 27 {
michael@0 28 return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
michael@0 29 }
michael@0 30
michael@0 31 // Escape Char will take ch, escape it and append the result to
michael@0 32 // aStringToAppendTo
michael@0 33 void
michael@0 34 mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo,
michael@0 35 bool inAttribute)
michael@0 36 {
michael@0 37 switch (ch)
michael@0 38 {
michael@0 39 case '<':
michael@0 40 aStringToAppendTo.AppendLiteral("&lt;");
michael@0 41 break;
michael@0 42 case '>':
michael@0 43 aStringToAppendTo.AppendLiteral("&gt;");
michael@0 44 break;
michael@0 45 case '&':
michael@0 46 aStringToAppendTo.AppendLiteral("&amp;");
michael@0 47 break;
michael@0 48 case '"':
michael@0 49 if (inAttribute)
michael@0 50 {
michael@0 51 aStringToAppendTo.AppendLiteral("&quot;");
michael@0 52 break;
michael@0 53 }
michael@0 54 // else fall through
michael@0 55 default:
michael@0 56 aStringToAppendTo += ch;
michael@0 57 }
michael@0 58
michael@0 59 return;
michael@0 60 }
michael@0 61
michael@0 62 // EscapeStr takes the passed in string and
michael@0 63 // escapes it IN PLACE.
michael@0 64 void
michael@0 65 mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
michael@0 66 {
michael@0 67 // the replace substring routines
michael@0 68 // don't seem to work if you have a character
michael@0 69 // in the in string that is also in the replacement
michael@0 70 // string! =(
michael@0 71 //aInString.ReplaceSubstring("&", "&amp;");
michael@0 72 //aInString.ReplaceSubstring("<", "&lt;");
michael@0 73 //aInString.ReplaceSubstring(">", "&gt;");
michael@0 74 for (uint32_t i = 0; i < aInString.Length();)
michael@0 75 {
michael@0 76 switch (aInString[i])
michael@0 77 {
michael@0 78 case '<':
michael@0 79 aInString.Cut(i, 1);
michael@0 80 aInString.Insert(NS_LITERAL_STRING("&lt;"), i);
michael@0 81 i += 4; // skip past the integers we just added
michael@0 82 break;
michael@0 83 case '>':
michael@0 84 aInString.Cut(i, 1);
michael@0 85 aInString.Insert(NS_LITERAL_STRING("&gt;"), i);
michael@0 86 i += 4; // skip past the integers we just added
michael@0 87 break;
michael@0 88 case '&':
michael@0 89 aInString.Cut(i, 1);
michael@0 90 aInString.Insert(NS_LITERAL_STRING("&amp;"), i);
michael@0 91 i += 5; // skip past the integers we just added
michael@0 92 break;
michael@0 93 case '"':
michael@0 94 if (inAttribute)
michael@0 95 {
michael@0 96 aInString.Cut(i, 1);
michael@0 97 aInString.Insert(NS_LITERAL_STRING("&quot;"), i);
michael@0 98 i += 6;
michael@0 99 break;
michael@0 100 }
michael@0 101 // else fall through
michael@0 102 default:
michael@0 103 i++;
michael@0 104 }
michael@0 105 }
michael@0 106 }
michael@0 107
michael@0 108 void
michael@0 109 mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString)
michael@0 110 {
michael@0 111 const char16_t * subString = nullptr;
michael@0 112 for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;)
michael@0 113 {
michael@0 114 int32_t remainingChars = i - aStartPos;
michael@0 115 if (aInString[i] == '&')
michael@0 116 {
michael@0 117 subString = &aInString[i];
michael@0 118 if (!nsCRT::strncmp(subString, MOZ_UTF16("&lt;"), std::min(4, aLength - remainingChars)))
michael@0 119 {
michael@0 120 aOutString.Append(char16_t('<'));
michael@0 121 i += 4;
michael@0 122 }
michael@0 123 else if (!nsCRT::strncmp(subString, MOZ_UTF16("&gt;"), std::min(4, aLength - remainingChars)))
michael@0 124 {
michael@0 125 aOutString.Append(char16_t('>'));
michael@0 126 i += 4;
michael@0 127 }
michael@0 128 else if (!nsCRT::strncmp(subString, MOZ_UTF16("&amp;"), std::min(5, aLength - remainingChars)))
michael@0 129 {
michael@0 130 aOutString.Append(char16_t('&'));
michael@0 131 i += 5;
michael@0 132 }
michael@0 133 else if (!nsCRT::strncmp(subString, MOZ_UTF16("&quot;"), std::min(6, aLength - remainingChars)))
michael@0 134 {
michael@0 135 aOutString.Append(char16_t('"'));
michael@0 136 i += 6;
michael@0 137 }
michael@0 138 else
michael@0 139 {
michael@0 140 aOutString += aInString[i];
michael@0 141 i++;
michael@0 142 }
michael@0 143 }
michael@0 144 else
michael@0 145 {
michael@0 146 aOutString += aInString[i];
michael@0 147 i++;
michael@0 148 }
michael@0 149 }
michael@0 150 }
michael@0 151
michael@0 152 void
michael@0 153 mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength,
michael@0 154 const uint32_t pos, nsString& aOutString)
michael@0 155 {
michael@0 156 NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
michael@0 157 if (int32_t(pos) >= aInLength)
michael@0 158 return;
michael@0 159
michael@0 160 if (aInString[pos] == '@')
michael@0 161 {
michael@0 162 // only pre-pend a mailto url if the string contains a .domain in it..
michael@0 163 //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
michael@0 164 nsDependentString inString(aInString, aInLength);
michael@0 165 if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
michael@0 166 {
michael@0 167 aOutString.AssignLiteral("mailto:");
michael@0 168 aOutString += aInString;
michael@0 169 }
michael@0 170 }
michael@0 171 else if (aInString[pos] == '.')
michael@0 172 {
michael@0 173 if (ItMatchesDelimited(aInString, aInLength,
michael@0 174 MOZ_UTF16("www."), 4, LT_IGNORE, LT_IGNORE))
michael@0 175 {
michael@0 176 aOutString.AssignLiteral("http://");
michael@0 177 aOutString += aInString;
michael@0 178 }
michael@0 179 else if (ItMatchesDelimited(aInString,aInLength, MOZ_UTF16("ftp."), 4, LT_IGNORE, LT_IGNORE))
michael@0 180 {
michael@0 181 aOutString.AssignLiteral("ftp://");
michael@0 182 aOutString += aInString;
michael@0 183 }
michael@0 184 }
michael@0 185 }
michael@0 186
michael@0 187 bool
michael@0 188 mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength,
michael@0 189 const uint32_t pos, const modetype check,
michael@0 190 uint32_t& start)
michael@0 191 {
michael@0 192 switch(check)
michael@0 193 { // no breaks, because end of blocks is never reached
michael@0 194 case RFC1738:
michael@0 195 {
michael@0 196 if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], MOZ_UTF16("<URL:"), 5))
michael@0 197 {
michael@0 198 start = pos + 1;
michael@0 199 return true;
michael@0 200 }
michael@0 201 else
michael@0 202 return false;
michael@0 203 }
michael@0 204 case RFC2396E:
michael@0 205 {
michael@0 206 nsString temp(aInString, aInLength);
michael@0 207 int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(MOZ_UTF16("<>\""), pos - 1);
michael@0 208 if (i != kNotFound && (temp[uint32_t(i)] == '<' ||
michael@0 209 temp[uint32_t(i)] == '"'))
michael@0 210 {
michael@0 211 start = uint32_t(++i);
michael@0 212 return start < pos;
michael@0 213 }
michael@0 214 else
michael@0 215 return false;
michael@0 216 }
michael@0 217 case freetext:
michael@0 218 {
michael@0 219 int32_t i = pos - 1;
michael@0 220 for (; i >= 0 && (
michael@0 221 nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
michael@0 222 nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) ||
michael@0 223 aInString[uint32_t(i)] == '+' ||
michael@0 224 aInString[uint32_t(i)] == '-' ||
michael@0 225 aInString[uint32_t(i)] == '.'
michael@0 226 ); i--)
michael@0 227 ;
michael@0 228 if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]))
michael@0 229 {
michael@0 230 start = uint32_t(i);
michael@0 231 return true;
michael@0 232 }
michael@0 233 else
michael@0 234 return false;
michael@0 235 }
michael@0 236 case abbreviated:
michael@0 237 {
michael@0 238 int32_t i = pos - 1;
michael@0 239 // This disallows non-ascii-characters for email.
michael@0 240 // Currently correct, but revisit later after standards changed.
michael@0 241 bool isEmail = aInString[pos] == (char16_t)'@';
michael@0 242 // These chars mark the start of the URL
michael@0 243 for (; i >= 0
michael@0 244 && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<'
michael@0 245 && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\''
michael@0 246 && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ','
michael@0 247 && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '['
michael@0 248 && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|'
michael@0 249 && aInString[uint32_t(i)] != '\\'
michael@0 250 && !IsSpace(aInString[uint32_t(i)])
michael@0 251 && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)]))
michael@0 252 ; i--)
michael@0 253 ;
michael@0 254 if
michael@0 255 (
michael@0 256 ++i >= 0 && uint32_t(i) < pos
michael@0 257 &&
michael@0 258 (
michael@0 259 nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
michael@0 260 nsCRT::IsAsciiDigit(aInString[uint32_t(i)])
michael@0 261 )
michael@0 262 )
michael@0 263 {
michael@0 264 start = uint32_t(i);
michael@0 265 return true;
michael@0 266 }
michael@0 267 else
michael@0 268 return false;
michael@0 269 }
michael@0 270 default:
michael@0 271 return false;
michael@0 272 } //switch
michael@0 273 }
michael@0 274
michael@0 275 bool
michael@0 276 mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
michael@0 277 const modetype check, const uint32_t start, uint32_t& end)
michael@0 278 {
michael@0 279 switch(check)
michael@0 280 { // no breaks, because end of blocks is never reached
michael@0 281 case RFC1738:
michael@0 282 case RFC2396E:
michael@0 283 {
michael@0 284 nsString temp(aInString, aInStringLength);
michael@0 285
michael@0 286 int32_t i = temp.FindCharInSet(MOZ_UTF16("<>\""), pos + 1);
michael@0 287 if (i != kNotFound && temp[uint32_t(i--)] ==
michael@0 288 (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
michael@0 289 {
michael@0 290 end = uint32_t(i);
michael@0 291 return end > pos;
michael@0 292 }
michael@0 293 return false;
michael@0 294 }
michael@0 295 case freetext:
michael@0 296 case abbreviated:
michael@0 297 {
michael@0 298 uint32_t i = pos + 1;
michael@0 299 bool isEmail = aInString[pos] == (char16_t)'@';
michael@0 300 bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL
michael@0 301 bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL
michael@0 302 for (; int32_t(i) < aInStringLength; i++)
michael@0 303 {
michael@0 304 // These chars mark the end of the URL
michael@0 305 if (aInString[i] == '>' || aInString[i] == '<' ||
michael@0 306 aInString[i] == '"' || aInString[i] == '`' ||
michael@0 307 aInString[i] == '}' || aInString[i] == '{' ||
michael@0 308 aInString[i] == '|' ||
michael@0 309 (aInString[i] == ')' && !seenOpeningParenthesis) ||
michael@0 310 (aInString[i] == ']' && !seenOpeningSquareBracket) ||
michael@0 311 // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
michael@0 312 (aInString[i] == '[' && i > 2 &&
michael@0 313 (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
michael@0 314 IsSpace(aInString[i]))
michael@0 315 break;
michael@0 316 // Disallow non-ascii-characters for email.
michael@0 317 // Currently correct, but revisit later after standards changed.
michael@0 318 if (isEmail && (
michael@0 319 aInString[i] == '(' || aInString[i] == '\'' ||
michael@0 320 !nsCRT::IsAscii(aInString[i])))
michael@0 321 break;
michael@0 322 if (aInString[i] == '(')
michael@0 323 seenOpeningParenthesis = true;
michael@0 324 if (aInString[i] == '[')
michael@0 325 seenOpeningSquareBracket = true;
michael@0 326 }
michael@0 327 // These chars are allowed in the middle of the URL, but not at end.
michael@0 328 // Technically they are, but are used in normal text after the URL.
michael@0 329 while (--i > pos && (
michael@0 330 aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
michael@0 331 aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
michael@0 332 aInString[i] == ':' || aInString[i] == '\''
michael@0 333 ))
michael@0 334 ;
michael@0 335 if (i > pos)
michael@0 336 {
michael@0 337 end = i;
michael@0 338 return true;
michael@0 339 }
michael@0 340 return false;
michael@0 341 }
michael@0 342 default:
michael@0 343 return false;
michael@0 344 } //switch
michael@0 345 }
michael@0 346
michael@0 347 void
michael@0 348 mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength,
michael@0 349 const uint32_t pos, const uint32_t whathasbeendone,
michael@0 350 const modetype check, const uint32_t start, const uint32_t end,
michael@0 351 nsString& txtURL, nsString& desc,
michael@0 352 int32_t& replaceBefore, int32_t& replaceAfter)
michael@0 353 {
michael@0 354 uint32_t descstart = start;
michael@0 355 switch(check)
michael@0 356 {
michael@0 357 case RFC1738:
michael@0 358 {
michael@0 359 descstart = start - 5;
michael@0 360 desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">"
michael@0 361 replaceAfter = end - pos + 1;
michael@0 362 } break;
michael@0 363 case RFC2396E:
michael@0 364 {
michael@0 365 descstart = start - 1;
michael@0 366 desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
michael@0 367 replaceAfter = end - pos + 1;
michael@0 368 } break;
michael@0 369 case freetext:
michael@0 370 case abbreviated:
michael@0 371 {
michael@0 372 descstart = start;
michael@0 373 desc.Append(&aInString[descstart], end - start + 1); // don't include brackets
michael@0 374 replaceAfter = end - pos;
michael@0 375 } break;
michael@0 376 default: break;
michael@0 377 } //switch
michael@0 378
michael@0 379 EscapeStr(desc, false);
michael@0 380
michael@0 381 txtURL.Append(&aInString[start], end - start + 1);
michael@0 382 txtURL.StripWhitespace();
michael@0 383
michael@0 384 // FIX ME
michael@0 385 nsAutoString temp2;
michael@0 386 ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
michael@0 387 replaceBefore = temp2.Length();
michael@0 388 return;
michael@0 389 }
michael@0 390
michael@0 391 bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
michael@0 392 {
michael@0 393 if (!mIOService)
michael@0 394 return false;
michael@0 395
michael@0 396 nsAutoCString scheme;
michael@0 397 nsresult rv = mIOService->ExtractScheme(aURL, scheme);
michael@0 398 if(NS_FAILED(rv))
michael@0 399 return false;
michael@0 400
michael@0 401 // Get the handler for this scheme.
michael@0 402 nsCOMPtr<nsIProtocolHandler> handler;
michael@0 403 rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
michael@0 404 if(NS_FAILED(rv))
michael@0 405 return false;
michael@0 406
michael@0 407 // Is it an external protocol handler? If not, linkify it.
michael@0 408 nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
michael@0 409 if (!externalHandler)
michael@0 410 return true; // handler is built-in, linkify it!
michael@0 411
michael@0 412 // If external app exists for the scheme then linkify it.
michael@0 413 bool exists;
michael@0 414 rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
michael@0 415 return(NS_SUCCEEDED(rv) && exists);
michael@0 416 }
michael@0 417
michael@0 418 bool
michael@0 419 mozTXTToHTMLConv::CheckURLAndCreateHTML(
michael@0 420 const nsString& txtURL, const nsString& desc, const modetype mode,
michael@0 421 nsString& outputHTML)
michael@0 422 {
michael@0 423 // Create *uri from txtURL
michael@0 424 nsCOMPtr<nsIURI> uri;
michael@0 425 nsresult rv;
michael@0 426 // Lazily initialize mIOService
michael@0 427 if (!mIOService)
michael@0 428 {
michael@0 429 mIOService = do_GetIOService();
michael@0 430
michael@0 431 if (!mIOService)
michael@0 432 return false;
michael@0 433 }
michael@0 434
michael@0 435 // See if the url should be linkified.
michael@0 436 NS_ConvertUTF16toUTF8 utf8URL(txtURL);
michael@0 437 if (!ShouldLinkify(utf8URL))
michael@0 438 return false;
michael@0 439
michael@0 440 // it would be faster if we could just check to see if there is a protocol
michael@0 441 // handler for the url and return instead of actually trying to create a url...
michael@0 442 rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
michael@0 443
michael@0 444 // Real work
michael@0 445 if (NS_SUCCEEDED(rv) && uri)
michael@0 446 {
michael@0 447 outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
michael@0 448 switch(mode)
michael@0 449 {
michael@0 450 case RFC1738:
michael@0 451 outputHTML.AppendLiteral("rfc1738");
michael@0 452 break;
michael@0 453 case RFC2396E:
michael@0 454 outputHTML.AppendLiteral("rfc2396E");
michael@0 455 break;
michael@0 456 case freetext:
michael@0 457 outputHTML.AppendLiteral("freetext");
michael@0 458 break;
michael@0 459 case abbreviated:
michael@0 460 outputHTML.AppendLiteral("abbreviated");
michael@0 461 break;
michael@0 462 default: break;
michael@0 463 }
michael@0 464 nsAutoString escapedURL(txtURL);
michael@0 465 EscapeStr(escapedURL, true);
michael@0 466
michael@0 467 outputHTML.AppendLiteral("\" href=\"");
michael@0 468 outputHTML += escapedURL;
michael@0 469 outputHTML.AppendLiteral("\">");
michael@0 470 outputHTML += desc;
michael@0 471 outputHTML.AppendLiteral("</a>");
michael@0 472 return true;
michael@0 473 }
michael@0 474 else
michael@0 475 return false;
michael@0 476 }
michael@0 477
michael@0 478 NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos)
michael@0 479 {
michael@0 480 // call FindURL on the passed in string
michael@0 481 nsAutoString outputHTML; // we'll ignore the generated output HTML
michael@0 482
michael@0 483 *aStartPos = -1;
michael@0 484 *aEndPos = -1;
michael@0 485
michael@0 486 FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
michael@0 487
michael@0 488 return NS_OK;
michael@0 489 }
michael@0 490
michael@0 491 bool
michael@0 492 mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
michael@0 493 const uint32_t whathasbeendone,
michael@0 494 nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter)
michael@0 495 {
michael@0 496 enum statetype {unchecked, invalid, startok, endok, success};
michael@0 497 static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
michael@0 498
michael@0 499 statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
michael@0 500 /* I don't like this abuse of enums as index for the array,
michael@0 501 but I don't know a better method */
michael@0 502
michael@0 503 // Define, which modes to check
michael@0 504 /* all modes but abbreviated are checked for text[pos] == ':',
michael@0 505 only abbreviated for '.', RFC2396E and abbreviated for '@' */
michael@0 506 for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
michael@0 507 iState = modetype(iState + 1))
michael@0 508 state[iState] = aInString[pos] == ':' ? unchecked : invalid;
michael@0 509 switch (aInString[pos])
michael@0 510 {
michael@0 511 case '@':
michael@0 512 state[RFC2396E] = unchecked;
michael@0 513 // no break here
michael@0 514 case '.':
michael@0 515 state[abbreviated] = unchecked;
michael@0 516 break;
michael@0 517 case ':':
michael@0 518 state[abbreviated] = invalid;
michael@0 519 break;
michael@0 520 default:
michael@0 521 break;
michael@0 522 }
michael@0 523
michael@0 524 // Test, first successful mode wins, sequence defined by |ranking|
michael@0 525 int32_t iCheck = 0; // the currently tested modetype
michael@0 526 modetype check = ranking[iCheck];
michael@0 527 for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
michael@0 528 iCheck++)
michael@0 529 /* check state from last run.
michael@0 530 If this is the first, check this one, which isn't = success yet */
michael@0 531 {
michael@0 532 check = ranking[iCheck];
michael@0 533
michael@0 534 uint32_t start, end;
michael@0 535
michael@0 536 if (state[check] == unchecked)
michael@0 537 if (FindURLStart(aInString, aInLength, pos, check, start))
michael@0 538 state[check] = startok;
michael@0 539
michael@0 540 if (state[check] == startok)
michael@0 541 if (FindURLEnd(aInString, aInLength, pos, check, start, end))
michael@0 542 state[check] = endok;
michael@0 543
michael@0 544 if (state[check] == endok)
michael@0 545 {
michael@0 546 nsAutoString txtURL, desc;
michael@0 547 int32_t resultReplaceBefore, resultReplaceAfter;
michael@0 548
michael@0 549 CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
michael@0 550 txtURL, desc,
michael@0 551 resultReplaceBefore, resultReplaceAfter);
michael@0 552
michael@0 553 if (aInString[pos] != ':')
michael@0 554 {
michael@0 555 nsAutoString temp = txtURL;
michael@0 556 txtURL.SetLength(0);
michael@0 557 CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
michael@0 558 }
michael@0 559
michael@0 560 if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
michael@0 561 outputHTML))
michael@0 562 {
michael@0 563 replaceBefore = resultReplaceBefore;
michael@0 564 replaceAfter = resultReplaceAfter;
michael@0 565 state[check] = success;
michael@0 566 }
michael@0 567 } // if
michael@0 568 } // for
michael@0 569 return state[check] == success;
michael@0 570 }
michael@0 571
michael@0 572 bool
michael@0 573 mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString,
michael@0 574 int32_t aInLength, const char16_t* rep, int32_t aRepLen,
michael@0 575 LIMTYPE before, LIMTYPE after)
michael@0 576 {
michael@0 577
michael@0 578 // this little method gets called a LOT. I found we were spending a
michael@0 579 // lot of time just calculating the length of the variable "rep"
michael@0 580 // over and over again every time we called it. So we're now passing
michael@0 581 // an integer in here.
michael@0 582 int32_t textLen = aInLength;
michael@0 583
michael@0 584 if
michael@0 585 (
michael@0 586 ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
michael@0 587 && textLen < aRepLen) ||
michael@0 588 ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
michael@0 589 && textLen < aRepLen + 1) ||
michael@0 590 (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
michael@0 591 && textLen < aRepLen + 2)
michael@0 592 )
michael@0 593 return false;
michael@0 594
michael@0 595 char16_t text0 = aInString[0];
michael@0 596 char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
michael@0 597
michael@0 598 if
michael@0 599 (
michael@0 600 (before == LT_ALPHA
michael@0 601 && !nsCRT::IsAsciiAlpha(text0)) ||
michael@0 602 (before == LT_DIGIT
michael@0 603 && !nsCRT::IsAsciiDigit(text0)) ||
michael@0 604 (before == LT_DELIMITER
michael@0 605 &&
michael@0 606 (
michael@0 607 nsCRT::IsAsciiAlpha(text0) ||
michael@0 608 nsCRT::IsAsciiDigit(text0) ||
michael@0 609 text0 == *rep
michael@0 610 )) ||
michael@0 611 (after == LT_ALPHA
michael@0 612 && !nsCRT::IsAsciiAlpha(textAfterPos)) ||
michael@0 613 (after == LT_DIGIT
michael@0 614 && !nsCRT::IsAsciiDigit(textAfterPos)) ||
michael@0 615 (after == LT_DELIMITER
michael@0 616 &&
michael@0 617 (
michael@0 618 nsCRT::IsAsciiAlpha(textAfterPos) ||
michael@0 619 nsCRT::IsAsciiDigit(textAfterPos) ||
michael@0 620 textAfterPos == *rep
michael@0 621 )) ||
michael@0 622 !Substring(Substring(aInString, aInString+aInLength),
michael@0 623 (before == LT_IGNORE ? 0 : 1),
michael@0 624 aRepLen).Equals(Substring(rep, rep+aRepLen),
michael@0 625 nsCaseInsensitiveStringComparator())
michael@0 626 )
michael@0 627 return false;
michael@0 628
michael@0 629 return true;
michael@0 630 }
michael@0 631
michael@0 632 uint32_t
michael@0 633 mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
michael@0 634 const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after)
michael@0 635 {
michael@0 636 uint32_t result = 0;
michael@0 637
michael@0 638 for (int32_t i = 0; i < aInStringLength; i++)
michael@0 639 {
michael@0 640 const char16_t * indexIntoString = &aInString[i];
michael@0 641 if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
michael@0 642 result++;
michael@0 643 }
michael@0 644 return result;
michael@0 645 }
michael@0 646
michael@0 647
michael@0 648 // NOTE: the converted html for the phrase is appended to aOutString
michael@0 649 // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
michael@0 650 bool
michael@0 651 mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
michael@0 652 const char16_t* tagTXT, int32_t aTagTXTLen,
michael@0 653 const char* tagHTML, const char* attributeHTML,
michael@0 654 nsString& aOutString, uint32_t& openTags)
michael@0 655 {
michael@0 656 /* We're searching for the following pattern:
michael@0 657 LT_DELIMITER - "*" - ALPHA -
michael@0 658 [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
michael@0 659 <strong> is only inserted, if existence of a pair could be verified
michael@0 660 We use the first opening/closing tag, if we can choose */
michael@0 661
michael@0 662 const char16_t * newOffset = aInString;
michael@0 663 int32_t newLength = aInStringLength;
michael@0 664 if (!col0) // skip the first element?
michael@0 665 {
michael@0 666 newOffset = &aInString[1];
michael@0 667 newLength = aInStringLength - 1;
michael@0 668 }
michael@0 669
michael@0 670 // opening tag
michael@0 671 if
michael@0 672 (
michael@0 673 ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
michael@0 674 (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
michael@0 675 && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen,
michael@0 676 LT_ALPHA, LT_DELIMITER) // remaining closing tags
michael@0 677 > openTags
michael@0 678 )
michael@0 679 {
michael@0 680 openTags++;
michael@0 681 aOutString.AppendLiteral("<");
michael@0 682 aOutString.AppendASCII(tagHTML);
michael@0 683 aOutString.Append(char16_t(' '));
michael@0 684 aOutString.AppendASCII(attributeHTML);
michael@0 685 aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
michael@0 686 aOutString.Append(tagTXT);
michael@0 687 aOutString.AppendLiteral("</span>");
michael@0 688 return true;
michael@0 689 }
michael@0 690
michael@0 691 // closing tag
michael@0 692 else if (openTags > 0
michael@0 693 && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
michael@0 694 {
michael@0 695 openTags--;
michael@0 696 aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
michael@0 697 aOutString.Append(tagTXT);
michael@0 698 aOutString.AppendLiteral("</span></");
michael@0 699 aOutString.AppendASCII(tagHTML);
michael@0 700 aOutString.Append(char16_t('>'));
michael@0 701 return true;
michael@0 702 }
michael@0 703
michael@0 704 return false;
michael@0 705 }
michael@0 706
michael@0 707
michael@0 708 bool
michael@0 709 mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
michael@0 710 const char* tagTXT, const char* imageName,
michael@0 711 nsString& outputHTML, int32_t& glyphTextLen)
michael@0 712 {
michael@0 713 if ( !aInString || !tagTXT || !imageName )
michael@0 714 return false;
michael@0 715
michael@0 716 int32_t tagLen = strlen(tagTXT);
michael@0 717
michael@0 718 uint32_t delim = (col0 ? 0 : 1) + tagLen;
michael@0 719
michael@0 720 if
michael@0 721 (
michael@0 722 (col0 || IsSpace(aInString[0]))
michael@0 723 &&
michael@0 724 (
michael@0 725 aLength <= int32_t(delim) ||
michael@0 726 IsSpace(aInString[delim]) ||
michael@0 727 (aLength > int32_t(delim + 1)
michael@0 728 &&
michael@0 729 (
michael@0 730 aInString[delim] == '.' ||
michael@0 731 aInString[delim] == ',' ||
michael@0 732 aInString[delim] == ';' ||
michael@0 733 aInString[delim] == '8' ||
michael@0 734 aInString[delim] == '>' ||
michael@0 735 aInString[delim] == '!' ||
michael@0 736 aInString[delim] == '?'
michael@0 737 )
michael@0 738 && IsSpace(aInString[delim + 1]))
michael@0 739 )
michael@0 740 && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
michael@0 741 col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
michael@0 742 // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
michael@0 743 )
michael@0 744 {
michael@0 745 if (!col0)
michael@0 746 {
michael@0 747 outputHTML.Truncate();
michael@0 748 outputHTML.Append(char16_t(' '));
michael@0 749 }
michael@0 750
michael@0 751 outputHTML.AppendLiteral("<span class=\""); // <span class="
michael@0 752 AppendASCIItoUTF16(imageName, outputHTML); // e.g. smiley-frown
michael@0 753 outputHTML.AppendLiteral("\" title=\""); // " title="
michael@0 754 AppendASCIItoUTF16(tagTXT, outputHTML); // smiley tooltip
michael@0 755 outputHTML.AppendLiteral("\"><span>"); // "><span>
michael@0 756 AppendASCIItoUTF16(tagTXT, outputHTML); // original text
michael@0 757 outputHTML.AppendLiteral("</span></span>"); // </span></span>
michael@0 758 glyphTextLen = (col0 ? 0 : 1) + tagLen;
michael@0 759 return true;
michael@0 760 }
michael@0 761
michael@0 762 return false;
michael@0 763 }
michael@0 764
michael@0 765 // the glyph is appended to aOutputString instead of the original string...
michael@0 766 bool
michael@0 767 mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
michael@0 768 nsString& aOutputString, int32_t& glyphTextLen)
michael@0 769 {
michael@0 770 char16_t text0 = aInString[0];
michael@0 771 char16_t text1 = aInString[1];
michael@0 772 char16_t firstChar = (col0 ? text0 : text1);
michael@0 773
michael@0 774 // temporary variable used to store the glyph html text
michael@0 775 nsAutoString outputHTML;
michael@0 776 bool bTestSmilie;
michael@0 777 bool bArg = false;
michael@0 778 int i;
michael@0 779
michael@0 780 // refactor some of this mess to avoid code duplication and speed execution a bit
michael@0 781 // there are two cases that need to be tried one after another. To avoid a lot of
michael@0 782 // duplicate code, rolling into a loop
michael@0 783
michael@0 784 i = 0;
michael@0 785 while ( i < 2 )
michael@0 786 {
michael@0 787 bTestSmilie = false;
michael@0 788 if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
michael@0 789 {
michael@0 790 // first test passed
michael@0 791
michael@0 792 bTestSmilie = true;
michael@0 793 bArg = col0;
michael@0 794 }
michael@0 795 if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
michael@0 796 {
michael@0 797 // second test passed
michael@0 798
michael@0 799 bTestSmilie = true;
michael@0 800 bArg = false;
michael@0 801 }
michael@0 802 if ( bTestSmilie && (
michael@0 803 SmilyHit(aInString, aInLength, bArg,
michael@0 804 ":-)",
michael@0 805 "moz-smiley-s1", // smile
michael@0 806 outputHTML, glyphTextLen) ||
michael@0 807
michael@0 808 SmilyHit(aInString, aInLength, bArg,
michael@0 809 ":)",
michael@0 810 "moz-smiley-s1", // smile
michael@0 811 outputHTML, glyphTextLen) ||
michael@0 812
michael@0 813 SmilyHit(aInString, aInLength, bArg,
michael@0 814 ":-D",
michael@0 815 "moz-smiley-s5", // laughing
michael@0 816 outputHTML, glyphTextLen) ||
michael@0 817
michael@0 818 SmilyHit(aInString, aInLength, bArg,
michael@0 819 ":-(",
michael@0 820 "moz-smiley-s2", // frown
michael@0 821 outputHTML, glyphTextLen) ||
michael@0 822
michael@0 823 SmilyHit(aInString, aInLength, bArg,
michael@0 824 ":(",
michael@0 825 "moz-smiley-s2", // frown
michael@0 826 outputHTML, glyphTextLen) ||
michael@0 827
michael@0 828 SmilyHit(aInString, aInLength, bArg,
michael@0 829 ":-[",
michael@0 830 "moz-smiley-s6", // embarassed
michael@0 831 outputHTML, glyphTextLen) ||
michael@0 832
michael@0 833 SmilyHit(aInString, aInLength, bArg,
michael@0 834 ";-)",
michael@0 835 "moz-smiley-s3", // wink
michael@0 836 outputHTML, glyphTextLen) ||
michael@0 837
michael@0 838 SmilyHit(aInString, aInLength, col0,
michael@0 839 ";)",
michael@0 840 "moz-smiley-s3", // wink
michael@0 841 outputHTML, glyphTextLen) ||
michael@0 842
michael@0 843 SmilyHit(aInString, aInLength, bArg,
michael@0 844 ":-\\",
michael@0 845 "moz-smiley-s7", // undecided
michael@0 846 outputHTML, glyphTextLen) ||
michael@0 847
michael@0 848 SmilyHit(aInString, aInLength, bArg,
michael@0 849 ":-P",
michael@0 850 "moz-smiley-s4", // tongue
michael@0 851 outputHTML, glyphTextLen) ||
michael@0 852
michael@0 853 SmilyHit(aInString, aInLength, bArg,
michael@0 854 ";-P",
michael@0 855 "moz-smiley-s4", // tongue
michael@0 856 outputHTML, glyphTextLen) ||
michael@0 857
michael@0 858 SmilyHit(aInString, aInLength, bArg,
michael@0 859 "=-O",
michael@0 860 "moz-smiley-s8", // surprise
michael@0 861 outputHTML, glyphTextLen) ||
michael@0 862
michael@0 863 SmilyHit(aInString, aInLength, bArg,
michael@0 864 ":-*",
michael@0 865 "moz-smiley-s9", // kiss
michael@0 866 outputHTML, glyphTextLen) ||
michael@0 867
michael@0 868 SmilyHit(aInString, aInLength, bArg,
michael@0 869 ">:o",
michael@0 870 "moz-smiley-s10", // yell
michael@0 871 outputHTML, glyphTextLen) ||
michael@0 872
michael@0 873 SmilyHit(aInString, aInLength, bArg,
michael@0 874 ">:-o",
michael@0 875 "moz-smiley-s10", // yell
michael@0 876 outputHTML, glyphTextLen) ||
michael@0 877
michael@0 878 SmilyHit(aInString, aInLength, bArg,
michael@0 879 "8-)",
michael@0 880 "moz-smiley-s11", // cool
michael@0 881 outputHTML, glyphTextLen) ||
michael@0 882
michael@0 883 SmilyHit(aInString, aInLength, bArg,
michael@0 884 ":-$",
michael@0 885 "moz-smiley-s12", // money
michael@0 886 outputHTML, glyphTextLen) ||
michael@0 887
michael@0 888 SmilyHit(aInString, aInLength, bArg,
michael@0 889 ":-!",
michael@0 890 "moz-smiley-s13", // foot
michael@0 891 outputHTML, glyphTextLen) ||
michael@0 892
michael@0 893 SmilyHit(aInString, aInLength, bArg,
michael@0 894 "O:-)",
michael@0 895 "moz-smiley-s14", // innocent
michael@0 896 outputHTML, glyphTextLen) ||
michael@0 897
michael@0 898 SmilyHit(aInString, aInLength, bArg,
michael@0 899 ":'(",
michael@0 900 "moz-smiley-s15", // cry
michael@0 901 outputHTML, glyphTextLen) ||
michael@0 902
michael@0 903 SmilyHit(aInString, aInLength, bArg,
michael@0 904 ":-X",
michael@0 905 "moz-smiley-s16", // sealed
michael@0 906 outputHTML, glyphTextLen)
michael@0 907 )
michael@0 908 )
michael@0 909 {
michael@0 910 aOutputString.Append(outputHTML);
michael@0 911 return true;
michael@0 912 }
michael@0 913 i++;
michael@0 914 }
michael@0 915 if (text0 == '\f')
michael@0 916 {
michael@0 917 aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
michael@0 918 glyphTextLen = 1;
michael@0 919 return true;
michael@0 920 }
michael@0 921 if (text0 == '+' || text1 == '+')
michael@0 922 {
michael@0 923 if (ItMatchesDelimited(aInString, aInLength,
michael@0 924 MOZ_UTF16(" +/-"), 4,
michael@0 925 LT_IGNORE, LT_IGNORE))
michael@0 926 {
michael@0 927 aOutputString.AppendLiteral(" &plusmn;");
michael@0 928 glyphTextLen = 4;
michael@0 929 return true;
michael@0 930 }
michael@0 931 if (col0 && ItMatchesDelimited(aInString, aInLength,
michael@0 932 MOZ_UTF16("+/-"), 3,
michael@0 933 LT_IGNORE, LT_IGNORE))
michael@0 934 {
michael@0 935 aOutputString.AppendLiteral("&plusmn;");
michael@0 936 glyphTextLen = 3;
michael@0 937 return true;
michael@0 938 }
michael@0 939 }
michael@0 940
michael@0 941 // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5
michael@0 942 // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
michael@0 943 if
michael@0 944 (
michael@0 945 text1 == '^'
michael@0 946 &&
michael@0 947 (
michael@0 948 nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) ||
michael@0 949 text0 == ')' || text0 == ']' || text0 == '}'
michael@0 950 )
michael@0 951 &&
michael@0 952 (
michael@0 953 (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) ||
michael@0 954 (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3]))
michael@0 955 )
michael@0 956 )
michael@0 957 {
michael@0 958 // Find first non-digit
michael@0 959 int32_t delimPos = 3; // skip "^" and first digit (or '-')
michael@0 960 for (; delimPos < aInLength
michael@0 961 &&
michael@0 962 (
michael@0 963 nsCRT::IsAsciiDigit(aInString[delimPos]) ||
michael@0 964 (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
michael@0 965 nsCRT::IsAsciiDigit(aInString[delimPos + 1]))
michael@0 966 );
michael@0 967 delimPos++)
michael@0 968 ;
michael@0 969
michael@0 970 if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
michael@0 971 {
michael@0 972 return false;
michael@0 973 }
michael@0 974
michael@0 975 outputHTML.Truncate();
michael@0 976 outputHTML += text0;
michael@0 977 outputHTML.AppendLiteral(
michael@0 978 "<sup class=\"moz-txt-sup\">"
michael@0 979 "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
michael@0 980 "^</span>");
michael@0 981
michael@0 982 aOutputString.Append(outputHTML);
michael@0 983 aOutputString.Append(&aInString[2], delimPos - 2);
michael@0 984 aOutputString.AppendLiteral("</sup>");
michael@0 985
michael@0 986 glyphTextLen = delimPos /* - 1 + 1 */ ;
michael@0 987 return true;
michael@0 988 }
michael@0 989 /*
michael@0 990 The following strings are not substituted:
michael@0 991 |TXT |HTML |Reason
michael@0 992 +------+---------+----------
michael@0 993 -> &larr; Bug #454
michael@0 994 => &lArr; dito
michael@0 995 <- &rarr; dito
michael@0 996 <= &rArr; dito
michael@0 997 (tm) &trade; dito
michael@0 998 1/4 &frac14; is triggered by 1/4 Part 1, 2/4 Part 2, ...
michael@0 999 3/4 &frac34; dito
michael@0 1000 1/2 &frac12; similar
michael@0 1001 */
michael@0 1002 return false;
michael@0 1003 }
michael@0 1004
michael@0 1005 /***************************************************************************
michael@0 1006 Library-internal Interface
michael@0 1007 ****************************************************************************/
michael@0 1008
michael@0 1009 mozTXTToHTMLConv::mozTXTToHTMLConv()
michael@0 1010 {
michael@0 1011 }
michael@0 1012
michael@0 1013 mozTXTToHTMLConv::~mozTXTToHTMLConv()
michael@0 1014 {
michael@0 1015 }
michael@0 1016
michael@0 1017 NS_IMPL_ISUPPORTS(mozTXTToHTMLConv,
michael@0 1018 mozITXTToHTMLConv,
michael@0 1019 nsIStreamConverter,
michael@0 1020 nsIStreamListener,
michael@0 1021 nsIRequestObserver)
michael@0 1022
michael@0 1023 int32_t
michael@0 1024 mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line,
michael@0 1025 uint32_t& logLineStart)
michael@0 1026 {
michael@0 1027 int32_t result = 0;
michael@0 1028 int32_t lineLength = NS_strlen(line);
michael@0 1029
michael@0 1030 bool moreCites = true;
michael@0 1031 while (moreCites)
michael@0 1032 {
michael@0 1033 /* E.g. the following lines count as quote:
michael@0 1034
michael@0 1035 > text
michael@0 1036 //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
michael@0 1037 >text
michael@0 1038 //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
michael@0 1039 > text
michael@0 1040 ] text
michael@0 1041 USER> text
michael@0 1042 USER] text
michael@0 1043 //#endif
michael@0 1044
michael@0 1045 logLineStart is the position of "t" in this example
michael@0 1046 */
michael@0 1047 uint32_t i = logLineStart;
michael@0 1048
michael@0 1049 #ifdef QUOTE_RECOGNITION_AGGRESSIVE
michael@0 1050 for (; int32_t(i) < lineLength && IsSpace(line[i]); i++)
michael@0 1051 ;
michael@0 1052 for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
michael@0 1053 && nsCRT::IsUpper(line[i]) ; i++)
michael@0 1054 ;
michael@0 1055 if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
michael@0 1056 #else
michael@0 1057 if (int32_t(i) < lineLength && line[i] == '>')
michael@0 1058 #endif
michael@0 1059 {
michael@0 1060 i++;
michael@0 1061 if (int32_t(i) < lineLength && line[i] == ' ')
michael@0 1062 i++;
michael@0 1063 // sendmail/mbox
michael@0 1064 // Placed here for performance increase
michael@0 1065 const char16_t * indexString = &line[logLineStart];
michael@0 1066 // here, |logLineStart < lineLength| is always true
michael@0 1067 uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
michael@0 1068 if (Substring(indexString,
michael@0 1069 indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
michael@0 1070 nsCaseInsensitiveStringComparator()))
michael@0 1071 //XXX RFC2646
michael@0 1072 moreCites = false;
michael@0 1073 else
michael@0 1074 {
michael@0 1075 result++;
michael@0 1076 logLineStart = i;
michael@0 1077 }
michael@0 1078 }
michael@0 1079 else
michael@0 1080 moreCites = false;
michael@0 1081 }
michael@0 1082
michael@0 1083 return result;
michael@0 1084 }
michael@0 1085
michael@0 1086 void
michael@0 1087 mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString)
michael@0 1088 {
michael@0 1089 bool doURLs = 0 != (whattodo & kURLs);
michael@0 1090 bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
michael@0 1091 bool doStructPhrase = 0 != (whattodo & kStructPhrase);
michael@0 1092
michael@0 1093 uint32_t structPhrase_strong = 0; // Number of currently open tags
michael@0 1094 uint32_t structPhrase_underline = 0;
michael@0 1095 uint32_t structPhrase_italic = 0;
michael@0 1096 uint32_t structPhrase_code = 0;
michael@0 1097
michael@0 1098 nsAutoString outputHTML; // moved here for performance increase
michael@0 1099
michael@0 1100 for(uint32_t i = 0; int32_t(i) < aInStringLength;)
michael@0 1101 {
michael@0 1102 if (doGlyphSubstitution)
michael@0 1103 {
michael@0 1104 int32_t glyphTextLen;
michael@0 1105 if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
michael@0 1106 {
michael@0 1107 i += glyphTextLen;
michael@0 1108 continue;
michael@0 1109 }
michael@0 1110 }
michael@0 1111
michael@0 1112 if (doStructPhrase)
michael@0 1113 {
michael@0 1114 const char16_t * newOffset = aInString;
michael@0 1115 int32_t newLength = aInStringLength;
michael@0 1116 if (i > 0 ) // skip the first element?
michael@0 1117 {
michael@0 1118 newOffset = &aInString[i-1];
michael@0 1119 newLength = aInStringLength - i + 1;
michael@0 1120 }
michael@0 1121
michael@0 1122 switch (aInString[i]) // Performance increase
michael@0 1123 {
michael@0 1124 case '*':
michael@0 1125 if (StructPhraseHit(newOffset, newLength, i == 0,
michael@0 1126 MOZ_UTF16("*"), 1,
michael@0 1127 "b", "class=\"moz-txt-star\"",
michael@0 1128 aOutString, structPhrase_strong))
michael@0 1129 {
michael@0 1130 i++;
michael@0 1131 continue;
michael@0 1132 }
michael@0 1133 break;
michael@0 1134 case '/':
michael@0 1135 if (StructPhraseHit(newOffset, newLength, i == 0,
michael@0 1136 MOZ_UTF16("/"), 1,
michael@0 1137 "i", "class=\"moz-txt-slash\"",
michael@0 1138 aOutString, structPhrase_italic))
michael@0 1139 {
michael@0 1140 i++;
michael@0 1141 continue;
michael@0 1142 }
michael@0 1143 break;
michael@0 1144 case '_':
michael@0 1145 if (StructPhraseHit(newOffset, newLength, i == 0,
michael@0 1146 MOZ_UTF16("_"), 1,
michael@0 1147 "span" /* <u> is deprecated */,
michael@0 1148 "class=\"moz-txt-underscore\"",
michael@0 1149 aOutString, structPhrase_underline))
michael@0 1150 {
michael@0 1151 i++;
michael@0 1152 continue;
michael@0 1153 }
michael@0 1154 break;
michael@0 1155 case '|':
michael@0 1156 if (StructPhraseHit(newOffset, newLength, i == 0,
michael@0 1157 MOZ_UTF16("|"), 1,
michael@0 1158 "code", "class=\"moz-txt-verticalline\"",
michael@0 1159 aOutString, structPhrase_code))
michael@0 1160 {
michael@0 1161 i++;
michael@0 1162 continue;
michael@0 1163 }
michael@0 1164 break;
michael@0 1165 }
michael@0 1166 }
michael@0 1167
michael@0 1168 if (doURLs)
michael@0 1169 {
michael@0 1170 switch (aInString[i])
michael@0 1171 {
michael@0 1172 case ':':
michael@0 1173 case '@':
michael@0 1174 case '.':
michael@0 1175 if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
michael@0 1176 {
michael@0 1177 int32_t replaceBefore;
michael@0 1178 int32_t replaceAfter;
michael@0 1179 if (FindURL(aInString, aInStringLength, i, whattodo,
michael@0 1180 outputHTML, replaceBefore, replaceAfter)
michael@0 1181 && structPhrase_strong + structPhrase_italic +
michael@0 1182 structPhrase_underline + structPhrase_code == 0
michael@0 1183 /* workaround for bug #19445 */ )
michael@0 1184 {
michael@0 1185 aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
michael@0 1186 aOutString += outputHTML;
michael@0 1187 i += replaceAfter + 1;
michael@0 1188 continue;
michael@0 1189 }
michael@0 1190 }
michael@0 1191 break;
michael@0 1192 } //switch
michael@0 1193 }
michael@0 1194
michael@0 1195 switch (aInString[i])
michael@0 1196 {
michael@0 1197 // Special symbols
michael@0 1198 case '<':
michael@0 1199 case '>':
michael@0 1200 case '&':
michael@0 1201 EscapeChar(aInString[i], aOutString, false);
michael@0 1202 i++;
michael@0 1203 break;
michael@0 1204 // Normal characters
michael@0 1205 default:
michael@0 1206 aOutString += aInString[i];
michael@0 1207 i++;
michael@0 1208 break;
michael@0 1209 }
michael@0 1210 }
michael@0 1211 }
michael@0 1212
michael@0 1213 void
michael@0 1214 mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
michael@0 1215 {
michael@0 1216 // some common variables we were recalculating
michael@0 1217 // every time inside the for loop...
michael@0 1218 int32_t lengthOfInString = aInString.Length();
michael@0 1219 const char16_t * uniBuffer = aInString.get();
michael@0 1220
michael@0 1221 #ifdef DEBUG_BenB_Perf
michael@0 1222 PRTime parsing_start = PR_IntervalNow();
michael@0 1223 #endif
michael@0 1224
michael@0 1225 // Look for simple entities not included in a tags and scan them.
michael@0 1226 /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
michael@0 1227 or in a tag ("<!--[...]-->").
michael@0 1228 Unescape the rest (text between tags) and pass it to ScanTXT. */
michael@0 1229 for (int32_t i = 0; i < lengthOfInString;)
michael@0 1230 {
michael@0 1231 if (aInString[i] == '<') // html tag
michael@0 1232 {
michael@0 1233 uint32_t start = uint32_t(i);
michael@0 1234 if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a')
michael@0 1235 // if a tag, skip until </a>
michael@0 1236 {
michael@0 1237 i = aInString.Find("</a>", true, i);
michael@0 1238 if (i == kNotFound)
michael@0 1239 i = lengthOfInString;
michael@0 1240 else
michael@0 1241 i += 4;
michael@0 1242 }
michael@0 1243 else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' &&
michael@0 1244 aInString[uint32_t(i) + 3] == '-')
michael@0 1245 //if out-commended code, skip until -->
michael@0 1246 {
michael@0 1247 i = aInString.Find("-->", false, i);
michael@0 1248 if (i == kNotFound)
michael@0 1249 i = lengthOfInString;
michael@0 1250 else
michael@0 1251 i += 3;
michael@0 1252
michael@0 1253 }
michael@0 1254 else // just skip tag (attributes etc.)
michael@0 1255 {
michael@0 1256 i = aInString.FindChar('>', i);
michael@0 1257 if (i == kNotFound)
michael@0 1258 i = lengthOfInString;
michael@0 1259 else
michael@0 1260 i++;
michael@0 1261 }
michael@0 1262 aOutString.Append(&uniBuffer[start], uint32_t(i) - start);
michael@0 1263 }
michael@0 1264 else
michael@0 1265 {
michael@0 1266 uint32_t start = uint32_t(i);
michael@0 1267 i = aInString.FindChar('<', i);
michael@0 1268 if (i == kNotFound)
michael@0 1269 i = lengthOfInString;
michael@0 1270
michael@0 1271 nsString tempString;
michael@0 1272 tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
michael@0 1273 UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
michael@0 1274 ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
michael@0 1275 }
michael@0 1276 }
michael@0 1277
michael@0 1278 #ifdef DEBUG_BenB_Perf
michael@0 1279 printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
michael@0 1280 #endif
michael@0 1281 }
michael@0 1282
michael@0 1283 /****************************************************************************
michael@0 1284 XPCOM Interface
michael@0 1285 *****************************************************************************/
michael@0 1286
michael@0 1287 NS_IMETHODIMP
michael@0 1288 mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
michael@0 1289 const char *aFromType,
michael@0 1290 const char *aToType,
michael@0 1291 nsISupports *aCtxt, nsIInputStream **_retval)
michael@0 1292 {
michael@0 1293 return NS_ERROR_NOT_IMPLEMENTED;
michael@0 1294 }
michael@0 1295
michael@0 1296 NS_IMETHODIMP
michael@0 1297 mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
michael@0 1298 const char *aToType,
michael@0 1299 nsIStreamListener *aListener, nsISupports *aCtxt) {
michael@0 1300 return NS_ERROR_NOT_IMPLEMENTED;
michael@0 1301 }
michael@0 1302
michael@0 1303 NS_IMETHODIMP
michael@0 1304 mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
michael@0 1305 nsIInputStream *inStr, uint64_t sourceOffset,
michael@0 1306 uint32_t count)
michael@0 1307 {
michael@0 1308 return NS_ERROR_NOT_IMPLEMENTED;
michael@0 1309 }
michael@0 1310
michael@0 1311 NS_IMETHODIMP
michael@0 1312 mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
michael@0 1313 {
michael@0 1314 return NS_ERROR_NOT_IMPLEMENTED;
michael@0 1315 }
michael@0 1316
michael@0 1317 NS_IMETHODIMP
michael@0 1318 mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
michael@0 1319 nsresult aStatus)
michael@0 1320 {
michael@0 1321 return NS_ERROR_NOT_IMPLEMENTED;
michael@0 1322 }
michael@0 1323
michael@0 1324 NS_IMETHODIMP
michael@0 1325 mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart,
michael@0 1326 uint32_t *_retval)
michael@0 1327 {
michael@0 1328 if (!logLineStart || !_retval || !line)
michael@0 1329 return NS_ERROR_NULL_POINTER;
michael@0 1330 *_retval = CiteLevelTXT(line, *logLineStart);
michael@0 1331 return NS_OK;
michael@0 1332 }
michael@0 1333
michael@0 1334 NS_IMETHODIMP
michael@0 1335 mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo,
michael@0 1336 char16_t **_retval)
michael@0 1337 {
michael@0 1338 NS_ENSURE_ARG(text);
michael@0 1339
michael@0 1340 // FIX ME!!!
michael@0 1341 nsString outString;
michael@0 1342 int32_t inLength = NS_strlen(text);
michael@0 1343 // by setting a large capacity up front, we save time
michael@0 1344 // when appending characters to the output string because we don't
michael@0 1345 // need to reallocate and re-copy the characters already in the out String.
michael@0 1346 NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
michael@0 1347 if (inLength == 0) {
michael@0 1348 *_retval = NS_strdup(text);
michael@0 1349 return NS_OK;
michael@0 1350 }
michael@0 1351
michael@0 1352 outString.SetCapacity(uint32_t(inLength * growthRate));
michael@0 1353 ScanTXT(text, inLength, whattodo, outString);
michael@0 1354
michael@0 1355 *_retval = ToNewUnicode(outString);
michael@0 1356 return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
michael@0 1357 }
michael@0 1358
michael@0 1359 NS_IMETHODIMP
michael@0 1360 mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo,
michael@0 1361 char16_t **_retval)
michael@0 1362 {
michael@0 1363 NS_ENSURE_ARG(text);
michael@0 1364
michael@0 1365 // FIX ME!!!
michael@0 1366 nsString outString;
michael@0 1367 nsString inString (text); // look at this nasty extra copy of the entire input buffer!
michael@0 1368 outString.SetCapacity(uint32_t(inString.Length() * growthRate));
michael@0 1369
michael@0 1370 ScanHTML(inString, whattodo, outString);
michael@0 1371 *_retval = ToNewUnicode(outString);
michael@0 1372 return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
michael@0 1373 }
michael@0 1374
michael@0 1375 nsresult
michael@0 1376 MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
michael@0 1377 {
michael@0 1378 NS_PRECONDITION(aConv != nullptr, "null ptr");
michael@0 1379 if (!aConv)
michael@0 1380 return NS_ERROR_NULL_POINTER;
michael@0 1381
michael@0 1382 *aConv = new mozTXTToHTMLConv();
michael@0 1383 if (!*aConv)
michael@0 1384 return NS_ERROR_OUT_OF_MEMORY;
michael@0 1385
michael@0 1386 NS_ADDREF(*aConv);
michael@0 1387 // return (*aConv)->Init();
michael@0 1388 return NS_OK;
michael@0 1389 }

mercurial