The Tor Browser: diff netwerk/streamconv/converters/mozTXTToHTMLConv.cpp

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1389 @@
     1.4 +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "mozTXTToHTMLConv.h"
    1.10 +#include "nsNetUtil.h"
    1.11 +#include "nsUnicharUtils.h"
    1.12 +#include "nsCRT.h"
    1.13 +#include "nsIExternalProtocolHandler.h"
    1.14 +#include "nsIIOService.h"
    1.15 +
    1.16 +#include <algorithm>
    1.17 +
    1.18 +#ifdef DEBUG_BenB_Perf
    1.19 +#include "prtime.h"
    1.20 +#include "prinrval.h"
    1.21 +#endif
    1.22 +
    1.23 +const double growthRate = 1.2;
    1.24 +
    1.25 +// Bug 183111, editor now replaces multiple spaces with leading
    1.26 +// 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
    1.27 +// 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
    1.28 +// Also recognize the Japanese ideographic space 0x3000 as a space.
    1.29 +static inline bool IsSpace(const char16_t aChar)
    1.30 +{
    1.31 +  return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
    1.32 +}
    1.33 +
    1.34 +// Escape Char will take ch, escape it and append the result to 
    1.35 +// aStringToAppendTo
    1.36 +void
    1.37 +mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo,
    1.38 +                             bool inAttribute)
    1.39 +{
    1.40 +    switch (ch)
    1.41 +    {
    1.42 +    case '<':
    1.43 +      aStringToAppendTo.AppendLiteral("&lt;");
    1.44 +      break;
    1.45 +    case '>':
    1.46 +      aStringToAppendTo.AppendLiteral("&gt;");
    1.47 +      break;
    1.48 +    case '&':
    1.49 +      aStringToAppendTo.AppendLiteral("&amp;");
    1.50 +      break;
    1.51 +    case '"':
    1.52 +      if (inAttribute)
    1.53 +      {
    1.54 +        aStringToAppendTo.AppendLiteral("&quot;");
    1.55 +        break;
    1.56 +      }
    1.57 +      // else fall through
    1.58 +    default:
    1.59 +      aStringToAppendTo += ch;
    1.60 +    }
    1.61 +
    1.62 +    return;
    1.63 +}
    1.64 +
    1.65 +// EscapeStr takes the passed in string and
    1.66 +// escapes it IN PLACE.
    1.67 +void
    1.68 +mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
    1.69 +{
    1.70 +  // the replace substring routines
    1.71 +  // don't seem to work if you have a character
    1.72 +  // in the in string that is also in the replacement
    1.73 +  // string! =(
    1.74 +  //aInString.ReplaceSubstring("&", "&amp;");
    1.75 +  //aInString.ReplaceSubstring("<", "&lt;");
    1.76 +  //aInString.ReplaceSubstring(">", "&gt;");
    1.77 +  for (uint32_t i = 0; i < aInString.Length();)
    1.78 +  {
    1.79 +    switch (aInString[i])
    1.80 +    {
    1.81 +    case '<':
    1.82 +      aInString.Cut(i, 1);
    1.83 +      aInString.Insert(NS_LITERAL_STRING("&lt;"), i);
    1.84 +      i += 4; // skip past the integers we just added
    1.85 +      break;
    1.86 +    case '>':
    1.87 +      aInString.Cut(i, 1);
    1.88 +      aInString.Insert(NS_LITERAL_STRING("&gt;"), i);
    1.89 +      i += 4; // skip past the integers we just added
    1.90 +      break;
    1.91 +    case '&':
    1.92 +      aInString.Cut(i, 1);
    1.93 +      aInString.Insert(NS_LITERAL_STRING("&amp;"), i);
    1.94 +      i += 5; // skip past the integers we just added
    1.95 +      break;
    1.96 +    case '"':
    1.97 +      if (inAttribute)
    1.98 +      {
    1.99 +        aInString.Cut(i, 1);
   1.100 +        aInString.Insert(NS_LITERAL_STRING("&quot;"), i);
   1.101 +        i += 6;
   1.102 +        break;
   1.103 +      }
   1.104 +      // else fall through
   1.105 +    default:
   1.106 +      i++;
   1.107 +    }
   1.108 +  }
   1.109 +}
   1.110 +
   1.111 +void 
   1.112 +mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString)
   1.113 +{
   1.114 +  const char16_t * subString = nullptr;
   1.115 +  for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;)
   1.116 +  {
   1.117 +    int32_t remainingChars = i - aStartPos;
   1.118 +    if (aInString[i] == '&')
   1.119 +    {
   1.120 +      subString = &aInString[i];
   1.121 +      if (!nsCRT::strncmp(subString, MOZ_UTF16("&lt;"), std::min(4, aLength - remainingChars)))
   1.122 +      {
   1.123 +        aOutString.Append(char16_t('<'));
   1.124 +        i += 4;
   1.125 +      }
   1.126 +      else if (!nsCRT::strncmp(subString, MOZ_UTF16("&gt;"), std::min(4, aLength - remainingChars)))
   1.127 +      {
   1.128 +        aOutString.Append(char16_t('>'));
   1.129 +        i += 4;
   1.130 +      }
   1.131 +      else if (!nsCRT::strncmp(subString, MOZ_UTF16("&amp;"), std::min(5, aLength - remainingChars)))
   1.132 +      {
   1.133 +        aOutString.Append(char16_t('&'));
   1.134 +        i += 5;
   1.135 +      }
   1.136 +      else if (!nsCRT::strncmp(subString, MOZ_UTF16("&quot;"), std::min(6, aLength - remainingChars)))
   1.137 +      {
   1.138 +        aOutString.Append(char16_t('"'));
   1.139 +        i += 6;
   1.140 +      }
   1.141 +      else
   1.142 +      {
   1.143 +        aOutString += aInString[i];
   1.144 +        i++;
   1.145 +      }
   1.146 +    }
   1.147 +    else
   1.148 +    {
   1.149 +      aOutString += aInString[i];
   1.150 +      i++;
   1.151 +    }
   1.152 +  }
   1.153 +}
   1.154 +
   1.155 +void
   1.156 +mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, 
   1.157 +                                         const uint32_t pos, nsString& aOutString)
   1.158 +{
   1.159 +  NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
   1.160 +  if (int32_t(pos) >= aInLength)
   1.161 +    return;
   1.162 +
   1.163 +  if (aInString[pos] == '@')
   1.164 +  {
   1.165 +    // only pre-pend a mailto url if the string contains a .domain in it..
   1.166 +    //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
   1.167 +    nsDependentString inString(aInString, aInLength);
   1.168 +    if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
   1.169 +    {
   1.170 +      aOutString.AssignLiteral("mailto:");
   1.171 +      aOutString += aInString;
   1.172 +    }
   1.173 +  }
   1.174 +  else if (aInString[pos] == '.')
   1.175 +  {
   1.176 +    if (ItMatchesDelimited(aInString, aInLength,
   1.177 +                           MOZ_UTF16("www."), 4, LT_IGNORE, LT_IGNORE))
   1.178 +    {
   1.179 +      aOutString.AssignLiteral("http://");
   1.180 +      aOutString += aInString;
   1.181 +    }
   1.182 +    else if (ItMatchesDelimited(aInString,aInLength, MOZ_UTF16("ftp."), 4, LT_IGNORE, LT_IGNORE))
   1.183 +    { 
   1.184 +      aOutString.AssignLiteral("ftp://");
   1.185 +      aOutString += aInString;
   1.186 +    }
   1.187 +  }
   1.188 +}
   1.189 +
   1.190 +bool
   1.191 +mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength,
   1.192 +                               const uint32_t pos, const modetype check,
   1.193 +                               uint32_t& start)
   1.194 +{
   1.195 +  switch(check)
   1.196 +  { // no breaks, because end of blocks is never reached
   1.197 +  case RFC1738:
   1.198 +  {
   1.199 +    if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], MOZ_UTF16("<URL:"), 5))
   1.200 +    {
   1.201 +      start = pos + 1;
   1.202 +      return true;
   1.203 +    }
   1.204 +    else
   1.205 +      return false;
   1.206 +  }
   1.207 +  case RFC2396E:
   1.208 +  {
   1.209 +    nsString temp(aInString, aInLength);
   1.210 +    int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(MOZ_UTF16("<>\""), pos - 1);
   1.211 +    if (i != kNotFound && (temp[uint32_t(i)] == '<' ||
   1.212 +                           temp[uint32_t(i)] == '"'))
   1.213 +    {
   1.214 +      start = uint32_t(++i);
   1.215 +      return start < pos;
   1.216 +    }
   1.217 +    else
   1.218 +      return false;
   1.219 +  }
   1.220 +  case freetext:
   1.221 +  {
   1.222 +    int32_t i = pos - 1;
   1.223 +    for (; i >= 0 && (
   1.224 +         nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
   1.225 +         nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) ||
   1.226 +         aInString[uint32_t(i)] == '+' ||
   1.227 +         aInString[uint32_t(i)] == '-' ||
   1.228 +         aInString[uint32_t(i)] == '.'
   1.229 +         ); i--)
   1.230 +      ;
   1.231 +    if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]))
   1.232 +    {
   1.233 +      start = uint32_t(i);
   1.234 +      return true;
   1.235 +    }
   1.236 +    else
   1.237 +      return false;
   1.238 +  }
   1.239 +  case abbreviated:
   1.240 +  {
   1.241 +    int32_t i = pos - 1;
   1.242 +    // This disallows non-ascii-characters for email.
   1.243 +    // Currently correct, but revisit later after standards changed.
   1.244 +    bool isEmail = aInString[pos] == (char16_t)'@';
   1.245 +    // These chars mark the start of the URL
   1.246 +    for (; i >= 0
   1.247 +             && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<'
   1.248 +             && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\''
   1.249 +             && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ','
   1.250 +             && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '['
   1.251 +             && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|'
   1.252 +             && aInString[uint32_t(i)] != '\\'
   1.253 +             && !IsSpace(aInString[uint32_t(i)])
   1.254 +             && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)]))
   1.255 +         ; i--)
   1.256 +      ;
   1.257 +    if
   1.258 +      (
   1.259 +        ++i >= 0 && uint32_t(i) < pos
   1.260 +          &&
   1.261 +          (
   1.262 +            nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
   1.263 +            nsCRT::IsAsciiDigit(aInString[uint32_t(i)])
   1.264 +          )
   1.265 +      )
   1.266 +    {
   1.267 +      start = uint32_t(i);
   1.268 +      return true;
   1.269 +    }
   1.270 +    else
   1.271 +      return false;
   1.272 +  }
   1.273 +  default:
   1.274 +    return false;
   1.275 +  } //switch
   1.276 +}
   1.277 +
   1.278 +bool
   1.279 +mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
   1.280 +           const modetype check, const uint32_t start, uint32_t& end)
   1.281 +{
   1.282 +  switch(check)
   1.283 +  { // no breaks, because end of blocks is never reached
   1.284 +  case RFC1738:
   1.285 +  case RFC2396E:
   1.286 +  {
   1.287 +    nsString temp(aInString, aInStringLength);
   1.288 +
   1.289 +    int32_t i = temp.FindCharInSet(MOZ_UTF16("<>\""), pos + 1);
   1.290 +    if (i != kNotFound && temp[uint32_t(i--)] ==
   1.291 +        (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
   1.292 +    {
   1.293 +      end = uint32_t(i);
   1.294 +      return end > pos;
   1.295 +    }
   1.296 +    return false;
   1.297 +  }
   1.298 +  case freetext:
   1.299 +  case abbreviated:
   1.300 +  {
   1.301 +    uint32_t i = pos + 1;
   1.302 +    bool isEmail = aInString[pos] == (char16_t)'@';
   1.303 +    bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL
   1.304 +    bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL
   1.305 +    for (; int32_t(i) < aInStringLength; i++)
   1.306 +    {
   1.307 +      // These chars mark the end of the URL
   1.308 +      if (aInString[i] == '>' || aInString[i] == '<' ||
   1.309 +          aInString[i] == '"' || aInString[i] == '`' ||
   1.310 +          aInString[i] == '}' || aInString[i] == '{' ||
   1.311 +          aInString[i] == '|' ||
   1.312 +          (aInString[i] == ')' && !seenOpeningParenthesis) ||
   1.313 +          (aInString[i] == ']' && !seenOpeningSquareBracket) ||
   1.314 +          // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
   1.315 +          (aInString[i] == '[' && i > 2 &&
   1.316 +           (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
   1.317 +          IsSpace(aInString[i]))
   1.318 +          break;
   1.319 +      // Disallow non-ascii-characters for email.
   1.320 +      // Currently correct, but revisit later after standards changed.
   1.321 +      if (isEmail && (
   1.322 +            aInString[i] == '(' || aInString[i] == '\'' ||
   1.323 +            !nsCRT::IsAscii(aInString[i])))
   1.324 +          break;
   1.325 +      if (aInString[i] == '(')
   1.326 +        seenOpeningParenthesis = true;
   1.327 +      if (aInString[i] == '[')
   1.328 +        seenOpeningSquareBracket = true;
   1.329 +    }
   1.330 +    // These chars are allowed in the middle of the URL, but not at end.
   1.331 +    // Technically they are, but are used in normal text after the URL.
   1.332 +    while (--i > pos && (
   1.333 +             aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
   1.334 +             aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
   1.335 +             aInString[i] == ':' || aInString[i] == '\''
   1.336 +             ))
   1.337 +        ;
   1.338 +    if (i > pos)
   1.339 +    {
   1.340 +      end = i;
   1.341 +      return true;
   1.342 +    }
   1.343 +    return false;
   1.344 +  }
   1.345 +  default:
   1.346 +    return false;
   1.347 +  } //switch
   1.348 +}
   1.349 +
   1.350 +void
   1.351 +mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, 
   1.352 +     const uint32_t pos, const uint32_t whathasbeendone,
   1.353 +     const modetype check, const uint32_t start, const uint32_t end,
   1.354 +     nsString& txtURL, nsString& desc,
   1.355 +     int32_t& replaceBefore, int32_t& replaceAfter)
   1.356 +{
   1.357 +  uint32_t descstart = start;
   1.358 +  switch(check)
   1.359 +  {
   1.360 +  case RFC1738:
   1.361 +  {
   1.362 +    descstart = start - 5;
   1.363 +    desc.Append(&aInString[descstart], end - descstart + 2);  // include "<URL:" and ">"
   1.364 +    replaceAfter = end - pos + 1;
   1.365 +  } break;
   1.366 +  case RFC2396E:
   1.367 +  {
   1.368 +    descstart = start - 1;
   1.369 +    desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
   1.370 +    replaceAfter = end - pos + 1;
   1.371 +  } break;
   1.372 +  case freetext:
   1.373 +  case abbreviated:
   1.374 +  {
   1.375 +    descstart = start;
   1.376 +    desc.Append(&aInString[descstart], end - start + 1); // don't include brackets  
   1.377 +    replaceAfter = end - pos;
   1.378 +  } break;
   1.379 +  default: break;
   1.380 +  } //switch
   1.381 +
   1.382 +  EscapeStr(desc, false);
   1.383 +
   1.384 +  txtURL.Append(&aInString[start], end - start + 1);
   1.385 +  txtURL.StripWhitespace();
   1.386 +
   1.387 +  // FIX ME
   1.388 +  nsAutoString temp2;
   1.389 +  ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
   1.390 +  replaceBefore = temp2.Length();
   1.391 +  return;
   1.392 +}
   1.393 +
   1.394 +bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
   1.395 +{
   1.396 +  if (!mIOService)
   1.397 +    return false;
   1.398 +
   1.399 +  nsAutoCString scheme;
   1.400 +  nsresult rv = mIOService->ExtractScheme(aURL, scheme);
   1.401 +  if(NS_FAILED(rv))
   1.402 +    return false;
   1.403 +
   1.404 +  // Get the handler for this scheme.
   1.405 +  nsCOMPtr<nsIProtocolHandler> handler;    
   1.406 +  rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
   1.407 +  if(NS_FAILED(rv))
   1.408 +    return false;
   1.409 +
   1.410 +  // Is it an external protocol handler? If not, linkify it.
   1.411 +  nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
   1.412 +  if (!externalHandler)
   1.413 +   return true; // handler is built-in, linkify it!
   1.414 +
   1.415 +  // If external app exists for the scheme then linkify it.
   1.416 +  bool exists;
   1.417 +  rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
   1.418 +  return(NS_SUCCEEDED(rv) && exists);
   1.419 +}
   1.420 +
   1.421 +bool
   1.422 +mozTXTToHTMLConv::CheckURLAndCreateHTML(
   1.423 +     const nsString& txtURL, const nsString& desc, const modetype mode,
   1.424 +     nsString& outputHTML)
   1.425 +{
   1.426 +  // Create *uri from txtURL
   1.427 +  nsCOMPtr<nsIURI> uri;
   1.428 +  nsresult rv;
   1.429 +  // Lazily initialize mIOService
   1.430 +  if (!mIOService)
   1.431 +  {
   1.432 +    mIOService = do_GetIOService();
   1.433 +
   1.434 +    if (!mIOService)
   1.435 +      return false;
   1.436 +  }
   1.437 +
   1.438 +  // See if the url should be linkified.
   1.439 +  NS_ConvertUTF16toUTF8 utf8URL(txtURL);
   1.440 +  if (!ShouldLinkify(utf8URL))
   1.441 +    return false;
   1.442 +
   1.443 +  // it would be faster if we could just check to see if there is a protocol
   1.444 +  // handler for the url and return instead of actually trying to create a url...
   1.445 +  rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
   1.446 +
   1.447 +  // Real work
   1.448 +  if (NS_SUCCEEDED(rv) && uri)
   1.449 +  {
   1.450 +    outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
   1.451 +    switch(mode)
   1.452 +    {
   1.453 +    case RFC1738:
   1.454 +      outputHTML.AppendLiteral("rfc1738");
   1.455 +      break;
   1.456 +    case RFC2396E:
   1.457 +      outputHTML.AppendLiteral("rfc2396E");
   1.458 +      break;
   1.459 +    case freetext:
   1.460 +      outputHTML.AppendLiteral("freetext");
   1.461 +      break;
   1.462 +    case abbreviated:
   1.463 +      outputHTML.AppendLiteral("abbreviated");
   1.464 +      break;
   1.465 +    default: break;
   1.466 +    }
   1.467 +    nsAutoString escapedURL(txtURL);
   1.468 +    EscapeStr(escapedURL, true);
   1.469 +
   1.470 +    outputHTML.AppendLiteral("\" href=\"");
   1.471 +    outputHTML += escapedURL;
   1.472 +    outputHTML.AppendLiteral("\">");
   1.473 +    outputHTML += desc;
   1.474 +    outputHTML.AppendLiteral("</a>");
   1.475 +    return true;
   1.476 +  }
   1.477 +  else
   1.478 +    return false;
   1.479 +}
   1.480 +
   1.481 +NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos)
   1.482 +{
   1.483 +  // call FindURL on the passed in string
   1.484 +  nsAutoString outputHTML; // we'll ignore the generated output HTML
   1.485 +
   1.486 +  *aStartPos = -1;
   1.487 +  *aEndPos = -1;
   1.488 +
   1.489 +  FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
   1.490 +
   1.491 +  return NS_OK;
   1.492 +}
   1.493 +
   1.494 +bool
   1.495 +mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
   1.496 +     const uint32_t whathasbeendone,
   1.497 +     nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter)
   1.498 +{
   1.499 +  enum statetype {unchecked, invalid, startok, endok, success};
   1.500 +  static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
   1.501 +
   1.502 +  statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
   1.503 +  /* I don't like this abuse of enums as index for the array,
   1.504 +     but I don't know a better method */
   1.505 +
   1.506 +  // Define, which modes to check
   1.507 +  /* all modes but abbreviated are checked for text[pos] == ':',
   1.508 +     only abbreviated for '.', RFC2396E and abbreviated for '@' */
   1.509 +  for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
   1.510 +       iState = modetype(iState + 1))
   1.511 +    state[iState] = aInString[pos] == ':' ? unchecked : invalid;
   1.512 +  switch (aInString[pos])
   1.513 +  {
   1.514 +  case '@':
   1.515 +    state[RFC2396E] = unchecked;
   1.516 +    // no break here
   1.517 +  case '.':
   1.518 +    state[abbreviated] = unchecked;
   1.519 +    break;
   1.520 +  case ':':
   1.521 +    state[abbreviated] = invalid;
   1.522 +    break;
   1.523 +  default:
   1.524 +    break;
   1.525 +  }
   1.526 +
   1.527 +  // Test, first successful mode wins, sequence defined by |ranking|
   1.528 +  int32_t iCheck = 0;  // the currently tested modetype
   1.529 +  modetype check = ranking[iCheck];
   1.530 +  for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
   1.531 +       iCheck++)
   1.532 +    /* check state from last run.
   1.533 +       If this is the first, check this one, which isn't = success yet */
   1.534 +  {
   1.535 +    check = ranking[iCheck];
   1.536 +
   1.537 +    uint32_t start, end;
   1.538 +
   1.539 +    if (state[check] == unchecked)
   1.540 +      if (FindURLStart(aInString, aInLength, pos, check, start))
   1.541 +        state[check] = startok;
   1.542 +
   1.543 +    if (state[check] == startok)
   1.544 +      if (FindURLEnd(aInString, aInLength, pos, check, start, end))
   1.545 +        state[check] = endok;
   1.546 +
   1.547 +    if (state[check] == endok)
   1.548 +    {
   1.549 +      nsAutoString txtURL, desc;
   1.550 +      int32_t resultReplaceBefore, resultReplaceAfter;
   1.551 +
   1.552 +      CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
   1.553 +                             txtURL, desc,
   1.554 +                             resultReplaceBefore, resultReplaceAfter);
   1.555 +
   1.556 +      if (aInString[pos] != ':')
   1.557 +      {
   1.558 +        nsAutoString temp = txtURL;
   1.559 +        txtURL.SetLength(0);
   1.560 +        CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
   1.561 +      }
   1.562 +
   1.563 +      if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
   1.564 +                                                     outputHTML))
   1.565 +      {
   1.566 +        replaceBefore = resultReplaceBefore;
   1.567 +        replaceAfter = resultReplaceAfter;
   1.568 +        state[check] = success;
   1.569 +      }
   1.570 +    } // if
   1.571 +  } // for
   1.572 +  return state[check] == success;
   1.573 +}
   1.574 +
   1.575 +bool
   1.576 +mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString,
   1.577 +    int32_t aInLength, const char16_t* rep, int32_t aRepLen,
   1.578 +    LIMTYPE before, LIMTYPE after)
   1.579 +{
   1.580 +
   1.581 +  // this little method gets called a LOT. I found we were spending a
   1.582 +  // lot of time just calculating the length of the variable "rep"
   1.583 +  // over and over again every time we called it. So we're now passing
   1.584 +  // an integer in here.
   1.585 +  int32_t textLen = aInLength;
   1.586 +
   1.587 +  if
   1.588 +    (
   1.589 +      ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
   1.590 +        && textLen < aRepLen) ||
   1.591 +      ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
   1.592 +        && textLen < aRepLen + 1) ||
   1.593 +      (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
   1.594 +        && textLen < aRepLen + 2)
   1.595 +    )
   1.596 +    return false;
   1.597 +
   1.598 +  char16_t text0 = aInString[0];
   1.599 +  char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
   1.600 +
   1.601 +  if
   1.602 +    (
   1.603 +      (before == LT_ALPHA
   1.604 +        && !nsCRT::IsAsciiAlpha(text0)) ||
   1.605 +      (before == LT_DIGIT
   1.606 +        && !nsCRT::IsAsciiDigit(text0)) ||
   1.607 +      (before == LT_DELIMITER
   1.608 +        &&
   1.609 +        (
   1.610 +          nsCRT::IsAsciiAlpha(text0) ||
   1.611 +          nsCRT::IsAsciiDigit(text0) ||
   1.612 +          text0 == *rep
   1.613 +        )) ||
   1.614 +      (after == LT_ALPHA
   1.615 +        && !nsCRT::IsAsciiAlpha(textAfterPos)) ||
   1.616 +      (after == LT_DIGIT
   1.617 +        && !nsCRT::IsAsciiDigit(textAfterPos)) ||
   1.618 +      (after == LT_DELIMITER
   1.619 +        &&
   1.620 +        (
   1.621 +          nsCRT::IsAsciiAlpha(textAfterPos) ||
   1.622 +          nsCRT::IsAsciiDigit(textAfterPos) ||
   1.623 +          textAfterPos == *rep
   1.624 +        )) ||
   1.625 +        !Substring(Substring(aInString, aInString+aInLength),
   1.626 +                   (before == LT_IGNORE ? 0 : 1),
   1.627 +                   aRepLen).Equals(Substring(rep, rep+aRepLen),
   1.628 +                                   nsCaseInsensitiveStringComparator())
   1.629 +    )
   1.630 +    return false;
   1.631 +
   1.632 +  return true;
   1.633 +}
   1.634 +
   1.635 +uint32_t
   1.636 +mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, 
   1.637 +     const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after)
   1.638 +{
   1.639 +  uint32_t result = 0;
   1.640 +
   1.641 +  for (int32_t i = 0; i < aInStringLength; i++)
   1.642 +  {
   1.643 +    const char16_t * indexIntoString = &aInString[i];
   1.644 +    if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
   1.645 +      result++;
   1.646 +  }
   1.647 +  return result;
   1.648 +}
   1.649 +
   1.650 +
   1.651 +// NOTE: the converted html for the phrase is appended to aOutString
   1.652 +// tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
   1.653 +bool
   1.654 +mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
   1.655 +     const char16_t* tagTXT, int32_t aTagTXTLen, 
   1.656 +     const char* tagHTML, const char* attributeHTML,
   1.657 +     nsString& aOutString, uint32_t& openTags)
   1.658 +{
   1.659 +  /* We're searching for the following pattern:
   1.660 +     LT_DELIMITER - "*" - ALPHA -
   1.661 +     [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
   1.662 +     <strong> is only inserted, if existence of a pair could be verified
   1.663 +     We use the first opening/closing tag, if we can choose */
   1.664 +
   1.665 +  const char16_t * newOffset = aInString;
   1.666 +  int32_t newLength = aInStringLength;
   1.667 +  if (!col0) // skip the first element?
   1.668 +  {
   1.669 +    newOffset = &aInString[1];
   1.670 +    newLength = aInStringLength - 1;
   1.671 +  }
   1.672 +
   1.673 +  // opening tag
   1.674 +  if
   1.675 +    (
   1.676 +      ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, 
   1.677 +           (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
   1.678 +        && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, 
   1.679 +              LT_ALPHA, LT_DELIMITER)  // remaining closing tags
   1.680 +              > openTags
   1.681 +    )
   1.682 +  {
   1.683 +    openTags++;
   1.684 +    aOutString.AppendLiteral("<");
   1.685 +    aOutString.AppendASCII(tagHTML);
   1.686 +    aOutString.Append(char16_t(' '));
   1.687 +    aOutString.AppendASCII(attributeHTML);
   1.688 +    aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
   1.689 +    aOutString.Append(tagTXT);
   1.690 +    aOutString.AppendLiteral("</span>");
   1.691 +    return true;
   1.692 +  }
   1.693 +
   1.694 +  // closing tag
   1.695 +  else if (openTags > 0
   1.696 +       && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
   1.697 +  {
   1.698 +    openTags--;
   1.699 +    aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
   1.700 +    aOutString.Append(tagTXT);
   1.701 +    aOutString.AppendLiteral("</span></");
   1.702 +    aOutString.AppendASCII(tagHTML);
   1.703 +    aOutString.Append(char16_t('>'));
   1.704 +    return true;
   1.705 +  }
   1.706 +
   1.707 +  return false;
   1.708 +}
   1.709 +
   1.710 +
   1.711 +bool
   1.712 +mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
   1.713 +         const char* tagTXT, const char* imageName,
   1.714 +         nsString& outputHTML, int32_t& glyphTextLen)
   1.715 +{
   1.716 +  if ( !aInString || !tagTXT || !imageName )
   1.717 +      return false;
   1.718 +
   1.719 +  int32_t tagLen = strlen(tagTXT);
   1.720 + 
   1.721 +  uint32_t delim = (col0 ? 0 : 1) + tagLen;
   1.722 +
   1.723 +  if
   1.724 +    (
   1.725 +      (col0 || IsSpace(aInString[0]))
   1.726 +        &&
   1.727 +        (
   1.728 +          aLength <= int32_t(delim) ||
   1.729 +          IsSpace(aInString[delim]) ||
   1.730 +          (aLength > int32_t(delim + 1)
   1.731 +            &&
   1.732 +            (
   1.733 +              aInString[delim] == '.' ||
   1.734 +              aInString[delim] == ',' ||
   1.735 +              aInString[delim] == ';' ||
   1.736 +              aInString[delim] == '8' ||
   1.737 +              aInString[delim] == '>' ||
   1.738 +              aInString[delim] == '!' ||
   1.739 +              aInString[delim] == '?'
   1.740 +            )
   1.741 +            && IsSpace(aInString[delim + 1]))
   1.742 +        )
   1.743 +        && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, 
   1.744 +                              col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
   1.745 +	        // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
   1.746 +    )
   1.747 +  {
   1.748 +    if (!col0)
   1.749 +    {
   1.750 +      outputHTML.Truncate();
   1.751 +      outputHTML.Append(char16_t(' '));
   1.752 +    }
   1.753 +
   1.754 +    outputHTML.AppendLiteral("<span class=\""); // <span class="
   1.755 +    AppendASCIItoUTF16(imageName, outputHTML);  // e.g. smiley-frown
   1.756 +    outputHTML.AppendLiteral("\" title=\"");    // " title="     
   1.757 +    AppendASCIItoUTF16(tagTXT, outputHTML);     // smiley tooltip
   1.758 +    outputHTML.AppendLiteral("\"><span>");      // "><span>      
   1.759 +    AppendASCIItoUTF16(tagTXT, outputHTML);     // original text 
   1.760 +    outputHTML.AppendLiteral("</span></span>"); // </span></span>
   1.761 +    glyphTextLen = (col0 ? 0 : 1) + tagLen;
   1.762 +    return true;
   1.763 +  }
   1.764 +
   1.765 +  return false;
   1.766 +}
   1.767 +
   1.768 +// the glyph is appended to aOutputString instead of the original string...
   1.769 +bool
   1.770 +mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
   1.771 +         nsString& aOutputString, int32_t& glyphTextLen)
   1.772 +{
   1.773 +  char16_t text0 = aInString[0]; 
   1.774 +  char16_t text1 = aInString[1];
   1.775 +  char16_t firstChar = (col0 ? text0 : text1);
   1.776 +
   1.777 +  // temporary variable used to store the glyph html text
   1.778 +  nsAutoString outputHTML;
   1.779 +  bool bTestSmilie;
   1.780 +  bool bArg = false;
   1.781 +  int i;
   1.782 +
   1.783 +  // refactor some of this mess to avoid code duplication and speed execution a bit
   1.784 +  // there are two cases that need to be tried one after another. To avoid a lot of
   1.785 +  // duplicate code, rolling into a loop
   1.786 +
   1.787 +  i = 0;
   1.788 +  while ( i < 2 )
   1.789 +  {
   1.790 +    bTestSmilie = false;
   1.791 +    if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
   1.792 +    {
   1.793 +        // first test passed
   1.794 +
   1.795 +        bTestSmilie = true;
   1.796 +        bArg = col0;
   1.797 +    }
   1.798 +    if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
   1.799 +    {
   1.800 +        // second test passed
   1.801 +
   1.802 +        bTestSmilie = true;
   1.803 +        bArg = false;
   1.804 +    }
   1.805 +    if ( bTestSmilie && (
   1.806 +          SmilyHit(aInString, aInLength, bArg,
   1.807 +                   ":-)",
   1.808 +                   "moz-smiley-s1", // smile
   1.809 +                   outputHTML, glyphTextLen) ||
   1.810 +  
   1.811 +          SmilyHit(aInString, aInLength, bArg,
   1.812 +                   ":)",
   1.813 +                   "moz-smiley-s1", // smile
   1.814 +                   outputHTML, glyphTextLen) ||
   1.815 +          
   1.816 +          SmilyHit(aInString, aInLength, bArg,
   1.817 +                   ":-D",
   1.818 +                   "moz-smiley-s5", // laughing
   1.819 +                   outputHTML, glyphTextLen) ||
   1.820 +          
   1.821 +          SmilyHit(aInString, aInLength, bArg,
   1.822 +                   ":-(",
   1.823 +                   "moz-smiley-s2", // frown
   1.824 +                   outputHTML, glyphTextLen) ||
   1.825 +          
   1.826 +          SmilyHit(aInString, aInLength, bArg,
   1.827 +                   ":(",
   1.828 +                   "moz-smiley-s2", // frown
   1.829 +                   outputHTML, glyphTextLen) ||
   1.830 +          
   1.831 +          SmilyHit(aInString, aInLength, bArg,
   1.832 +                   ":-[",
   1.833 +                   "moz-smiley-s6", // embarassed
   1.834 +                   outputHTML, glyphTextLen) ||
   1.835 +          
   1.836 +          SmilyHit(aInString, aInLength, bArg,
   1.837 +                   ";-)",
   1.838 +                   "moz-smiley-s3", // wink
   1.839 +                   outputHTML, glyphTextLen) ||
   1.840 +
   1.841 +          SmilyHit(aInString, aInLength, col0,
   1.842 +                   ";)",
   1.843 +                   "moz-smiley-s3", // wink
   1.844 +                   outputHTML, glyphTextLen) ||
   1.845 +          
   1.846 +          SmilyHit(aInString, aInLength, bArg,
   1.847 +                   ":-\\",
   1.848 +                   "moz-smiley-s7", // undecided
   1.849 +                   outputHTML, glyphTextLen) ||
   1.850 +          
   1.851 +          SmilyHit(aInString, aInLength, bArg,
   1.852 +                   ":-P",
   1.853 +                   "moz-smiley-s4", // tongue
   1.854 +                   outputHTML, glyphTextLen) ||
   1.855 +                   
   1.856 +          SmilyHit(aInString, aInLength, bArg,
   1.857 +                   ";-P",
   1.858 +                   "moz-smiley-s4", // tongue
   1.859 +                   outputHTML, glyphTextLen) ||  
   1.860 +         
   1.861 +          SmilyHit(aInString, aInLength, bArg,
   1.862 +                   "=-O",
   1.863 +                   "moz-smiley-s8", // surprise
   1.864 +                   outputHTML, glyphTextLen) ||
   1.865 +         
   1.866 +          SmilyHit(aInString, aInLength, bArg,
   1.867 +                   ":-*",
   1.868 +                   "moz-smiley-s9", // kiss
   1.869 +                   outputHTML, glyphTextLen) ||
   1.870 +         
   1.871 +          SmilyHit(aInString, aInLength, bArg,
   1.872 +                   ">:o",
   1.873 +                   "moz-smiley-s10", // yell
   1.874 +                   outputHTML, glyphTextLen) ||
   1.875 +          
   1.876 +          SmilyHit(aInString, aInLength, bArg,
   1.877 +                   ">:-o",
   1.878 +                   "moz-smiley-s10", // yell
   1.879 +                   outputHTML, glyphTextLen) ||
   1.880 +        
   1.881 +          SmilyHit(aInString, aInLength, bArg,
   1.882 +                   "8-)",
   1.883 +                   "moz-smiley-s11", // cool
   1.884 +                   outputHTML, glyphTextLen) ||
   1.885 +         
   1.886 +          SmilyHit(aInString, aInLength, bArg,
   1.887 +                   ":-$",
   1.888 +                   "moz-smiley-s12", // money
   1.889 +                   outputHTML, glyphTextLen) ||
   1.890 +         
   1.891 +          SmilyHit(aInString, aInLength, bArg,
   1.892 +                   ":-!",
   1.893 +                   "moz-smiley-s13", // foot
   1.894 +                   outputHTML, glyphTextLen) ||
   1.895 +         
   1.896 +          SmilyHit(aInString, aInLength, bArg,
   1.897 +                   "O:-)",
   1.898 +                   "moz-smiley-s14", // innocent
   1.899 +                   outputHTML, glyphTextLen) ||
   1.900 +         
   1.901 +          SmilyHit(aInString, aInLength, bArg,
   1.902 +                   ":'(",
   1.903 +                   "moz-smiley-s15", // cry
   1.904 +                   outputHTML, glyphTextLen) ||
   1.905 +         
   1.906 +          SmilyHit(aInString, aInLength, bArg,
   1.907 +                   ":-X",
   1.908 +                   "moz-smiley-s16", // sealed
   1.909 +                   outputHTML, glyphTextLen) 
   1.910 +        )
   1.911 +    )
   1.912 +    {
   1.913 +        aOutputString.Append(outputHTML);
   1.914 +        return true;
   1.915 +    }
   1.916 +    i++;
   1.917 +  }
   1.918 +  if (text0 == '\f')
   1.919 +  {
   1.920 +      aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
   1.921 +      glyphTextLen = 1;
   1.922 +      return true;
   1.923 +  }
   1.924 +  if (text0 == '+' || text1 == '+')
   1.925 +  {
   1.926 +    if (ItMatchesDelimited(aInString, aInLength,
   1.927 +                           MOZ_UTF16(" +/-"), 4,
   1.928 +                           LT_IGNORE, LT_IGNORE))
   1.929 +    {
   1.930 +      aOutputString.AppendLiteral(" &plusmn;");
   1.931 +      glyphTextLen = 4;
   1.932 +      return true;
   1.933 +    }
   1.934 +    if (col0 && ItMatchesDelimited(aInString, aInLength,
   1.935 +                                   MOZ_UTF16("+/-"), 3,
   1.936 +                                   LT_IGNORE, LT_IGNORE))
   1.937 +    {
   1.938 +      aOutputString.AppendLiteral("&plusmn;");
   1.939 +      glyphTextLen = 3;
   1.940 +      return true;
   1.941 +    }
   1.942 +  }
   1.943 +
   1.944 +  // x^2  =>  x<sup>2</sup>,   also handle powers x^-2,  x^0.5
   1.945 +  // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
   1.946 +  if    
   1.947 +    (
   1.948 +      text1 == '^'
   1.949 +      && 
   1.950 +      (
   1.951 +        nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) || 
   1.952 +        text0 == ')' || text0 == ']' || text0 == '}'
   1.953 +      )
   1.954 +      &&
   1.955 +      (
   1.956 +        (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) ||
   1.957 +        (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3]))
   1.958 +      )
   1.959 +    )
   1.960 +  {
   1.961 +    // Find first non-digit
   1.962 +    int32_t delimPos = 3;  // skip "^" and first digit (or '-')
   1.963 +    for (; delimPos < aInLength
   1.964 +           &&
   1.965 +           (
   1.966 +             nsCRT::IsAsciiDigit(aInString[delimPos]) || 
   1.967 +             (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
   1.968 +               nsCRT::IsAsciiDigit(aInString[delimPos + 1]))
   1.969 +           );
   1.970 +         delimPos++)
   1.971 +      ;
   1.972 +
   1.973 +    if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
   1.974 +    {
   1.975 +      return false;
   1.976 +    }
   1.977 +
   1.978 +    outputHTML.Truncate();
   1.979 +    outputHTML += text0;
   1.980 +    outputHTML.AppendLiteral(
   1.981 +      "<sup class=\"moz-txt-sup\">"
   1.982 +      "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
   1.983 +      "^</span>");
   1.984 +
   1.985 +    aOutputString.Append(outputHTML);
   1.986 +    aOutputString.Append(&aInString[2], delimPos - 2);
   1.987 +    aOutputString.AppendLiteral("</sup>");
   1.988 +
   1.989 +    glyphTextLen = delimPos /* - 1 + 1 */ ;
   1.990 +    return true;
   1.991 +  }
   1.992 +  /*
   1.993 +   The following strings are not substituted:
   1.994 +   |TXT   |HTML     |Reason
   1.995 +   +------+---------+----------
   1.996 +    ->     &larr;    Bug #454
   1.997 +    =>     &lArr;    dito
   1.998 +    <-     &rarr;    dito
   1.999 +    <=     &rArr;    dito
  1.1000 +    (tm)   &trade;   dito
  1.1001 +    1/4    &frac14;  is triggered by 1/4 Part 1, 2/4 Part 2, ...
  1.1002 +    3/4    &frac34;  dito
  1.1003 +    1/2    &frac12;  similar
  1.1004 +  */
  1.1005 +  return false;
  1.1006 +}
  1.1007 +
  1.1008 +/***************************************************************************
  1.1009 +  Library-internal Interface
  1.1010 +****************************************************************************/
  1.1011 +
  1.1012 +mozTXTToHTMLConv::mozTXTToHTMLConv()
  1.1013 +{
  1.1014 +}
  1.1015 +
  1.1016 +mozTXTToHTMLConv::~mozTXTToHTMLConv() 
  1.1017 +{
  1.1018 +}
  1.1019 +
  1.1020 +NS_IMPL_ISUPPORTS(mozTXTToHTMLConv,
  1.1021 +                  mozITXTToHTMLConv,
  1.1022 +                  nsIStreamConverter,
  1.1023 +                  nsIStreamListener,
  1.1024 +                  nsIRequestObserver)
  1.1025 +
  1.1026 +int32_t
  1.1027 +mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line,
  1.1028 +				    uint32_t& logLineStart)
  1.1029 +{
  1.1030 +  int32_t result = 0;
  1.1031 +  int32_t lineLength = NS_strlen(line);
  1.1032 +
  1.1033 +  bool moreCites = true;
  1.1034 +  while (moreCites)
  1.1035 +  {
  1.1036 +    /* E.g. the following lines count as quote:
  1.1037 +
  1.1038 +       > text
  1.1039 +       //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
  1.1040 +       >text
  1.1041 +       //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
  1.1042 +           > text
  1.1043 +       ] text
  1.1044 +       USER> text
  1.1045 +       USER] text
  1.1046 +       //#endif
  1.1047 +
  1.1048 +       logLineStart is the position of "t" in this example
  1.1049 +    */
  1.1050 +    uint32_t i = logLineStart;
  1.1051 +
  1.1052 +#ifdef QUOTE_RECOGNITION_AGGRESSIVE
  1.1053 +    for (; int32_t(i) < lineLength && IsSpace(line[i]); i++)
  1.1054 +      ;
  1.1055 +    for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
  1.1056 +                                   && nsCRT::IsUpper(line[i])   ; i++)
  1.1057 +      ;
  1.1058 +    if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
  1.1059 +#else
  1.1060 +    if (int32_t(i) < lineLength && line[i] == '>')
  1.1061 +#endif
  1.1062 +    {
  1.1063 +      i++;
  1.1064 +      if (int32_t(i) < lineLength && line[i] == ' ')
  1.1065 +        i++;
  1.1066 +      // sendmail/mbox
  1.1067 +      // Placed here for performance increase
  1.1068 +      const char16_t * indexString = &line[logLineStart];
  1.1069 +           // here, |logLineStart < lineLength| is always true
  1.1070 +      uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
  1.1071 +      if (Substring(indexString,
  1.1072 +                    indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
  1.1073 +                                                  nsCaseInsensitiveStringComparator()))
  1.1074 +        //XXX RFC2646
  1.1075 +        moreCites = false;
  1.1076 +      else
  1.1077 +      {
  1.1078 +        result++;
  1.1079 +        logLineStart = i;
  1.1080 +      }
  1.1081 +    }
  1.1082 +    else
  1.1083 +      moreCites = false;
  1.1084 +  }
  1.1085 +
  1.1086 +  return result;
  1.1087 +}
  1.1088 +
  1.1089 +void
  1.1090 +mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString)
  1.1091 +{
  1.1092 +  bool doURLs = 0 != (whattodo & kURLs);
  1.1093 +  bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
  1.1094 +  bool doStructPhrase = 0 != (whattodo & kStructPhrase);
  1.1095 +
  1.1096 +  uint32_t structPhrase_strong = 0;  // Number of currently open tags
  1.1097 +  uint32_t structPhrase_underline = 0;
  1.1098 +  uint32_t structPhrase_italic = 0;
  1.1099 +  uint32_t structPhrase_code = 0;
  1.1100 +
  1.1101 +  nsAutoString outputHTML;  // moved here for performance increase
  1.1102 +
  1.1103 +  for(uint32_t i = 0; int32_t(i) < aInStringLength;)
  1.1104 +  {
  1.1105 +    if (doGlyphSubstitution)
  1.1106 +    {
  1.1107 +      int32_t glyphTextLen;
  1.1108 +      if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
  1.1109 +      {
  1.1110 +        i += glyphTextLen;
  1.1111 +        continue;
  1.1112 +      }
  1.1113 +    }
  1.1114 +
  1.1115 +    if (doStructPhrase)
  1.1116 +    {
  1.1117 +      const char16_t * newOffset = aInString;
  1.1118 +      int32_t newLength = aInStringLength;
  1.1119 +      if (i > 0 ) // skip the first element?
  1.1120 +      {
  1.1121 +        newOffset = &aInString[i-1];
  1.1122 +        newLength = aInStringLength - i + 1;
  1.1123 +      }
  1.1124 +
  1.1125 +      switch (aInString[i]) // Performance increase
  1.1126 +      {
  1.1127 +      case '*':
  1.1128 +        if (StructPhraseHit(newOffset, newLength, i == 0,
  1.1129 +                            MOZ_UTF16("*"), 1,
  1.1130 +                            "b", "class=\"moz-txt-star\"",
  1.1131 +                            aOutString, structPhrase_strong))
  1.1132 +        {
  1.1133 +          i++;
  1.1134 +          continue;
  1.1135 +        }
  1.1136 +        break;
  1.1137 +      case '/':
  1.1138 +        if (StructPhraseHit(newOffset, newLength, i == 0,
  1.1139 +                            MOZ_UTF16("/"), 1,
  1.1140 +                            "i", "class=\"moz-txt-slash\"",
  1.1141 +                            aOutString, structPhrase_italic))
  1.1142 +        {
  1.1143 +          i++;
  1.1144 +          continue;
  1.1145 +        }
  1.1146 +        break;
  1.1147 +      case '_':
  1.1148 +        if (StructPhraseHit(newOffset, newLength, i == 0,
  1.1149 +                            MOZ_UTF16("_"), 1,
  1.1150 +                            "span" /* <u> is deprecated */,
  1.1151 +                            "class=\"moz-txt-underscore\"",
  1.1152 +                            aOutString, structPhrase_underline))
  1.1153 +        {
  1.1154 +          i++;
  1.1155 +          continue;
  1.1156 +        }
  1.1157 +        break;
  1.1158 +      case '|':
  1.1159 +        if (StructPhraseHit(newOffset, newLength, i == 0,
  1.1160 +                            MOZ_UTF16("|"), 1,
  1.1161 +                            "code", "class=\"moz-txt-verticalline\"",
  1.1162 +                            aOutString, structPhrase_code))
  1.1163 +        {
  1.1164 +          i++;
  1.1165 +          continue;
  1.1166 +        }
  1.1167 +        break;
  1.1168 +      }
  1.1169 +    }
  1.1170 +
  1.1171 +    if (doURLs)
  1.1172 +    {
  1.1173 +      switch (aInString[i])
  1.1174 +      {
  1.1175 +      case ':':
  1.1176 +      case '@':
  1.1177 +      case '.':
  1.1178 +        if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
  1.1179 +        {
  1.1180 +          int32_t replaceBefore;
  1.1181 +          int32_t replaceAfter;
  1.1182 +          if (FindURL(aInString, aInStringLength, i, whattodo,
  1.1183 +                      outputHTML, replaceBefore, replaceAfter)
  1.1184 +                  && structPhrase_strong + structPhrase_italic +
  1.1185 +                       structPhrase_underline + structPhrase_code == 0
  1.1186 +                       /* workaround for bug #19445 */ )
  1.1187 +          {
  1.1188 +            aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
  1.1189 +            aOutString += outputHTML;
  1.1190 +            i += replaceAfter + 1;
  1.1191 +            continue;
  1.1192 +          }
  1.1193 +        }
  1.1194 +        break;
  1.1195 +      } //switch
  1.1196 +    }
  1.1197 +
  1.1198 +    switch (aInString[i])
  1.1199 +    {
  1.1200 +    // Special symbols
  1.1201 +    case '<':
  1.1202 +    case '>':
  1.1203 +    case '&':
  1.1204 +      EscapeChar(aInString[i], aOutString, false);
  1.1205 +      i++;
  1.1206 +      break;
  1.1207 +    // Normal characters
  1.1208 +    default:
  1.1209 +      aOutString += aInString[i];
  1.1210 +      i++;
  1.1211 +      break;
  1.1212 +    }
  1.1213 +  }
  1.1214 +}
  1.1215 +
  1.1216 +void
  1.1217 +mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
  1.1218 +{ 
  1.1219 +  // some common variables we were recalculating
  1.1220 +  // every time inside the for loop...
  1.1221 +  int32_t lengthOfInString = aInString.Length();
  1.1222 +  const char16_t * uniBuffer = aInString.get();
  1.1223 +
  1.1224 +#ifdef DEBUG_BenB_Perf
  1.1225 +  PRTime parsing_start = PR_IntervalNow();
  1.1226 +#endif
  1.1227 +
  1.1228 +  // Look for simple entities not included in a tags and scan them.
  1.1229 +  /* Skip all tags ("<[...]>") and content in an a tag ("<a[...]</a>")
  1.1230 +     or in a tag ("<!--[...]-->").
  1.1231 +     Unescape the rest (text between tags) and pass it to ScanTXT. */
  1.1232 +  for (int32_t i = 0; i < lengthOfInString;)
  1.1233 +  {
  1.1234 +    if (aInString[i] == '<')  // html tag
  1.1235 +    {
  1.1236 +      uint32_t start = uint32_t(i);
  1.1237 +      if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a')
  1.1238 +           // if a tag, skip until </a>
  1.1239 +      {
  1.1240 +        i = aInString.Find("</a>", true, i);
  1.1241 +        if (i == kNotFound)
  1.1242 +          i = lengthOfInString;
  1.1243 +        else
  1.1244 +          i += 4;
  1.1245 +      }
  1.1246 +      else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' &&
  1.1247 +        aInString[uint32_t(i) + 3] == '-')
  1.1248 +          //if out-commended code, skip until -->
  1.1249 +      {
  1.1250 +        i = aInString.Find("-->", false, i);
  1.1251 +        if (i == kNotFound)
  1.1252 +          i = lengthOfInString;
  1.1253 +        else
  1.1254 +          i += 3;
  1.1255 +
  1.1256 +      }
  1.1257 +      else  // just skip tag (attributes etc.)
  1.1258 +      {
  1.1259 +        i = aInString.FindChar('>', i);
  1.1260 +        if (i == kNotFound)
  1.1261 +          i = lengthOfInString;
  1.1262 +        else
  1.1263 +          i++;
  1.1264 +      }
  1.1265 +      aOutString.Append(&uniBuffer[start], uint32_t(i) - start);
  1.1266 +    }
  1.1267 +    else
  1.1268 +    {
  1.1269 +      uint32_t start = uint32_t(i);
  1.1270 +      i = aInString.FindChar('<', i);
  1.1271 +      if (i == kNotFound)
  1.1272 +        i = lengthOfInString;
  1.1273 +  
  1.1274 +      nsString tempString;     
  1.1275 +      tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
  1.1276 +      UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
  1.1277 +      ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
  1.1278 +    }
  1.1279 +  }
  1.1280 +
  1.1281 +#ifdef DEBUG_BenB_Perf
  1.1282 +  printf("ScanHTML time:    %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
  1.1283 +#endif
  1.1284 +}
  1.1285 +
  1.1286 +/****************************************************************************
  1.1287 +  XPCOM Interface
  1.1288 +*****************************************************************************/
  1.1289 +
  1.1290 +NS_IMETHODIMP
  1.1291 +mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
  1.1292 +                          const char *aFromType,
  1.1293 +                          const char *aToType,
  1.1294 +                          nsISupports *aCtxt, nsIInputStream **_retval)
  1.1295 +{
  1.1296 +  return NS_ERROR_NOT_IMPLEMENTED;
  1.1297 +}
  1.1298 +
  1.1299 +NS_IMETHODIMP
  1.1300 +mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
  1.1301 +                                   const char *aToType,
  1.1302 +                                   nsIStreamListener *aListener, nsISupports *aCtxt) {
  1.1303 +  return NS_ERROR_NOT_IMPLEMENTED;
  1.1304 +}
  1.1305 +
  1.1306 +NS_IMETHODIMP
  1.1307 +mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
  1.1308 +                                 nsIInputStream *inStr, uint64_t sourceOffset,
  1.1309 +                                 uint32_t count)
  1.1310 +{
  1.1311 +  return NS_ERROR_NOT_IMPLEMENTED;
  1.1312 +}
  1.1313 +
  1.1314 +NS_IMETHODIMP
  1.1315 +mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
  1.1316 +{
  1.1317 +  return NS_ERROR_NOT_IMPLEMENTED;
  1.1318 +}
  1.1319 +
  1.1320 +NS_IMETHODIMP
  1.1321 +mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
  1.1322 +                                nsresult aStatus)
  1.1323 +{
  1.1324 +  return NS_ERROR_NOT_IMPLEMENTED;
  1.1325 +}
  1.1326 +
  1.1327 +NS_IMETHODIMP
  1.1328 +mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart,
  1.1329 +				uint32_t *_retval)
  1.1330 +{
  1.1331 +   if (!logLineStart || !_retval || !line)
  1.1332 +     return NS_ERROR_NULL_POINTER;
  1.1333 +   *_retval = CiteLevelTXT(line, *logLineStart);
  1.1334 +   return NS_OK;
  1.1335 +}
  1.1336 +
  1.1337 +NS_IMETHODIMP
  1.1338 +mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo,
  1.1339 +			   char16_t **_retval)
  1.1340 +{
  1.1341 +  NS_ENSURE_ARG(text);
  1.1342 +
  1.1343 +  // FIX ME!!!
  1.1344 +  nsString outString;
  1.1345 +  int32_t inLength = NS_strlen(text);
  1.1346 +  // by setting a large capacity up front, we save time
  1.1347 +  // when appending characters to the output string because we don't
  1.1348 +  // need to reallocate and re-copy the characters already in the out String.
  1.1349 +  NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
  1.1350 +  if (inLength == 0) {
  1.1351 +    *_retval = NS_strdup(text);
  1.1352 +    return NS_OK;
  1.1353 +  }
  1.1354 +
  1.1355 +  outString.SetCapacity(uint32_t(inLength * growthRate));
  1.1356 +  ScanTXT(text, inLength, whattodo, outString);
  1.1357 +
  1.1358 +  *_retval = ToNewUnicode(outString);
  1.1359 +  return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
  1.1360 +}
  1.1361 +
  1.1362 +NS_IMETHODIMP
  1.1363 +mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo,
  1.1364 +			    char16_t **_retval)
  1.1365 +{
  1.1366 +  NS_ENSURE_ARG(text);
  1.1367 +
  1.1368 +  // FIX ME!!!
  1.1369 +  nsString outString;
  1.1370 +  nsString inString (text); // look at this nasty extra copy of the entire input buffer!
  1.1371 +  outString.SetCapacity(uint32_t(inString.Length() * growthRate));
  1.1372 +
  1.1373 +  ScanHTML(inString, whattodo, outString);
  1.1374 +  *_retval = ToNewUnicode(outString);
  1.1375 +  return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
  1.1376 +}
  1.1377 +
  1.1378 +nsresult
  1.1379 +MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
  1.1380 +{
  1.1381 +    NS_PRECONDITION(aConv != nullptr, "null ptr");
  1.1382 +    if (!aConv)
  1.1383 +      return NS_ERROR_NULL_POINTER;
  1.1384 +
  1.1385 +    *aConv = new mozTXTToHTMLConv();
  1.1386 +    if (!*aConv)
  1.1387 +      return NS_ERROR_OUT_OF_MEMORY;
  1.1388 +
  1.1389 +    NS_ADDREF(*aConv);
  1.1390 +    //    return (*aConv)->Init();
  1.1391 +    return NS_OK;
  1.1392 +}
The Tor Browser / file diff

diff: netwerk/streamconv/converters/mozTXTToHTMLConv.cpp

netwerk/streamconv/converters/mozTXTToHTMLConv.cpp