michael@0: /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozTXTToHTMLConv.h" michael@0: #include "nsNetUtil.h" michael@0: #include "nsUnicharUtils.h" michael@0: #include "nsCRT.h" michael@0: #include "nsIExternalProtocolHandler.h" michael@0: #include "nsIIOService.h" michael@0: michael@0: #include michael@0: michael@0: #ifdef DEBUG_BenB_Perf michael@0: #include "prtime.h" michael@0: #include "prinrval.h" michael@0: #endif michael@0: michael@0: const double growthRate = 1.2; michael@0: michael@0: // Bug 183111, editor now replaces multiple spaces with leading michael@0: // 0xA0's and a single ending space, so need to treat 0xA0's as spaces. michael@0: // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)" michael@0: // Also recognize the Japanese ideographic space 0x3000 as a space. michael@0: static inline bool IsSpace(const char16_t aChar) michael@0: { michael@0: return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000); michael@0: } michael@0: michael@0: // Escape Char will take ch, escape it and append the result to michael@0: // aStringToAppendTo michael@0: void michael@0: mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo, michael@0: bool inAttribute) michael@0: { michael@0: switch (ch) michael@0: { michael@0: case '<': michael@0: aStringToAppendTo.AppendLiteral("<"); michael@0: break; michael@0: case '>': michael@0: aStringToAppendTo.AppendLiteral(">"); michael@0: break; michael@0: case '&': michael@0: aStringToAppendTo.AppendLiteral("&"); michael@0: break; michael@0: case '"': michael@0: if (inAttribute) michael@0: { michael@0: aStringToAppendTo.AppendLiteral("""); michael@0: break; michael@0: } michael@0: // else fall through michael@0: default: michael@0: aStringToAppendTo += ch; michael@0: } michael@0: michael@0: return; michael@0: } michael@0: michael@0: // EscapeStr takes the passed in string and michael@0: // escapes it IN PLACE. michael@0: void michael@0: mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) michael@0: { michael@0: // the replace substring routines michael@0: // don't seem to work if you have a character michael@0: // in the in string that is also in the replacement michael@0: // string! =( michael@0: //aInString.ReplaceSubstring("&", "&"); michael@0: //aInString.ReplaceSubstring("<", "<"); michael@0: //aInString.ReplaceSubstring(">", ">"); michael@0: for (uint32_t i = 0; i < aInString.Length();) michael@0: { michael@0: switch (aInString[i]) michael@0: { michael@0: case '<': michael@0: aInString.Cut(i, 1); michael@0: aInString.Insert(NS_LITERAL_STRING("<"), i); michael@0: i += 4; // skip past the integers we just added michael@0: break; michael@0: case '>': michael@0: aInString.Cut(i, 1); michael@0: aInString.Insert(NS_LITERAL_STRING(">"), i); michael@0: i += 4; // skip past the integers we just added michael@0: break; michael@0: case '&': michael@0: aInString.Cut(i, 1); michael@0: aInString.Insert(NS_LITERAL_STRING("&"), i); michael@0: i += 5; // skip past the integers we just added michael@0: break; michael@0: case '"': michael@0: if (inAttribute) michael@0: { michael@0: aInString.Cut(i, 1); michael@0: aInString.Insert(NS_LITERAL_STRING("""), i); michael@0: i += 6; michael@0: break; michael@0: } michael@0: // else fall through michael@0: default: michael@0: i++; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString) michael@0: { michael@0: const char16_t * subString = nullptr; michael@0: for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) michael@0: { michael@0: int32_t remainingChars = i - aStartPos; michael@0: if (aInString[i] == '&') michael@0: { michael@0: subString = &aInString[i]; michael@0: if (!nsCRT::strncmp(subString, MOZ_UTF16("<"), std::min(4, aLength - remainingChars))) michael@0: { michael@0: aOutString.Append(char16_t('<')); michael@0: i += 4; michael@0: } michael@0: else if (!nsCRT::strncmp(subString, MOZ_UTF16(">"), std::min(4, aLength - remainingChars))) michael@0: { michael@0: aOutString.Append(char16_t('>')); michael@0: i += 4; michael@0: } michael@0: else if (!nsCRT::strncmp(subString, MOZ_UTF16("&"), std::min(5, aLength - remainingChars))) michael@0: { michael@0: aOutString.Append(char16_t('&')); michael@0: i += 5; michael@0: } michael@0: else if (!nsCRT::strncmp(subString, MOZ_UTF16("""), std::min(6, aLength - remainingChars))) michael@0: { michael@0: aOutString.Append(char16_t('"')); michael@0: i += 6; michael@0: } michael@0: else michael@0: { michael@0: aOutString += aInString[i]; michael@0: i++; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: aOutString += aInString[i]; michael@0: i++; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, michael@0: const uint32_t pos, nsString& aOutString) michael@0: { michael@0: NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851"); michael@0: if (int32_t(pos) >= aInLength) michael@0: return; michael@0: michael@0: if (aInString[pos] == '@') michael@0: { michael@0: // only pre-pend a mailto url if the string contains a .domain in it.. michael@0: //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm" michael@0: nsDependentString inString(aInString, aInLength); michael@0: if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign.... michael@0: { michael@0: aOutString.AssignLiteral("mailto:"); michael@0: aOutString += aInString; michael@0: } michael@0: } michael@0: else if (aInString[pos] == '.') michael@0: { michael@0: if (ItMatchesDelimited(aInString, aInLength, michael@0: MOZ_UTF16("www."), 4, LT_IGNORE, LT_IGNORE)) michael@0: { michael@0: aOutString.AssignLiteral("http://"); michael@0: aOutString += aInString; michael@0: } michael@0: else if (ItMatchesDelimited(aInString,aInLength, MOZ_UTF16("ftp."), 4, LT_IGNORE, LT_IGNORE)) michael@0: { michael@0: aOutString.AssignLiteral("ftp://"); michael@0: aOutString += aInString; michael@0: } michael@0: } michael@0: } michael@0: michael@0: bool michael@0: mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength, michael@0: const uint32_t pos, const modetype check, michael@0: uint32_t& start) michael@0: { michael@0: switch(check) michael@0: { // no breaks, because end of blocks is never reached michael@0: case RFC1738: michael@0: { michael@0: if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], MOZ_UTF16("\""), pos - 1); michael@0: if (i != kNotFound && (temp[uint32_t(i)] == '<' || michael@0: temp[uint32_t(i)] == '"')) michael@0: { michael@0: start = uint32_t(++i); michael@0: return start < pos; michael@0: } michael@0: else michael@0: return false; michael@0: } michael@0: case freetext: michael@0: { michael@0: int32_t i = pos - 1; michael@0: for (; i >= 0 && ( michael@0: nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) || michael@0: nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) || michael@0: aInString[uint32_t(i)] == '+' || michael@0: aInString[uint32_t(i)] == '-' || michael@0: aInString[uint32_t(i)] == '.' michael@0: ); i--) michael@0: ; michael@0: if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)])) michael@0: { michael@0: start = uint32_t(i); michael@0: return true; michael@0: } michael@0: else michael@0: return false; michael@0: } michael@0: case abbreviated: michael@0: { michael@0: int32_t i = pos - 1; michael@0: // This disallows non-ascii-characters for email. michael@0: // Currently correct, but revisit later after standards changed. michael@0: bool isEmail = aInString[pos] == (char16_t)'@'; michael@0: // These chars mark the start of the URL michael@0: for (; i >= 0 michael@0: && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<' michael@0: && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\'' michael@0: && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ',' michael@0: && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '[' michael@0: && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|' michael@0: && aInString[uint32_t(i)] != '\\' michael@0: && !IsSpace(aInString[uint32_t(i)]) michael@0: && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)])) michael@0: ; i--) michael@0: ; michael@0: if michael@0: ( michael@0: ++i >= 0 && uint32_t(i) < pos michael@0: && michael@0: ( michael@0: nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) || michael@0: nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) michael@0: ) michael@0: ) michael@0: { michael@0: start = uint32_t(i); michael@0: return true; michael@0: } michael@0: else michael@0: return false; michael@0: } michael@0: default: michael@0: return false; michael@0: } //switch michael@0: } michael@0: michael@0: bool michael@0: mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, michael@0: const modetype check, const uint32_t start, uint32_t& end) michael@0: { michael@0: switch(check) michael@0: { // no breaks, because end of blocks is never reached michael@0: case RFC1738: michael@0: case RFC2396E: michael@0: { michael@0: nsString temp(aInString, aInStringLength); michael@0: michael@0: int32_t i = temp.FindCharInSet(MOZ_UTF16("<>\""), pos + 1); michael@0: if (i != kNotFound && temp[uint32_t(i--)] == michael@0: (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) michael@0: { michael@0: end = uint32_t(i); michael@0: return end > pos; michael@0: } michael@0: return false; michael@0: } michael@0: case freetext: michael@0: case abbreviated: michael@0: { michael@0: uint32_t i = pos + 1; michael@0: bool isEmail = aInString[pos] == (char16_t)'@'; michael@0: bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL michael@0: bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL michael@0: for (; int32_t(i) < aInStringLength; i++) michael@0: { michael@0: // These chars mark the end of the URL michael@0: if (aInString[i] == '>' || aInString[i] == '<' || michael@0: aInString[i] == '"' || aInString[i] == '`' || michael@0: aInString[i] == '}' || aInString[i] == '{' || michael@0: aInString[i] == '|' || michael@0: (aInString[i] == ')' && !seenOpeningParenthesis) || michael@0: (aInString[i] == ']' && !seenOpeningSquareBracket) || michael@0: // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo. michael@0: (aInString[i] == '[' && i > 2 && michael@0: (aInString[i - 1] != '/' || aInString[i - 2] != '/')) || michael@0: IsSpace(aInString[i])) michael@0: break; michael@0: // Disallow non-ascii-characters for email. michael@0: // Currently correct, but revisit later after standards changed. michael@0: if (isEmail && ( michael@0: aInString[i] == '(' || aInString[i] == '\'' || michael@0: !nsCRT::IsAscii(aInString[i]))) michael@0: break; michael@0: if (aInString[i] == '(') michael@0: seenOpeningParenthesis = true; michael@0: if (aInString[i] == '[') michael@0: seenOpeningSquareBracket = true; michael@0: } michael@0: // These chars are allowed in the middle of the URL, but not at end. michael@0: // Technically they are, but are used in normal text after the URL. michael@0: while (--i > pos && ( michael@0: aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' || michael@0: aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' || michael@0: aInString[i] == ':' || aInString[i] == '\'' michael@0: )) michael@0: ; michael@0: if (i > pos) michael@0: { michael@0: end = i; michael@0: return true; michael@0: } michael@0: return false; michael@0: } michael@0: default: michael@0: return false; michael@0: } //switch michael@0: } michael@0: michael@0: void michael@0: mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, michael@0: const uint32_t pos, const uint32_t whathasbeendone, michael@0: const modetype check, const uint32_t start, const uint32_t end, michael@0: nsString& txtURL, nsString& desc, michael@0: int32_t& replaceBefore, int32_t& replaceAfter) michael@0: { michael@0: uint32_t descstart = start; michael@0: switch(check) michael@0: { michael@0: case RFC1738: michael@0: { michael@0: descstart = start - 5; michael@0: desc.Append(&aInString[descstart], end - descstart + 2); // include "" michael@0: replaceAfter = end - pos + 1; michael@0: } break; michael@0: case RFC2396E: michael@0: { michael@0: descstart = start - 1; michael@0: desc.Append(&aInString[descstart], end - descstart + 2); // include brackets michael@0: replaceAfter = end - pos + 1; michael@0: } break; michael@0: case freetext: michael@0: case abbreviated: michael@0: { michael@0: descstart = start; michael@0: desc.Append(&aInString[descstart], end - start + 1); // don't include brackets michael@0: replaceAfter = end - pos; michael@0: } break; michael@0: default: break; michael@0: } //switch michael@0: michael@0: EscapeStr(desc, false); michael@0: michael@0: txtURL.Append(&aInString[start], end - start + 1); michael@0: txtURL.StripWhitespace(); michael@0: michael@0: // FIX ME michael@0: nsAutoString temp2; michael@0: ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2); michael@0: replaceBefore = temp2.Length(); michael@0: return; michael@0: } michael@0: michael@0: bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) michael@0: { michael@0: if (!mIOService) michael@0: return false; michael@0: michael@0: nsAutoCString scheme; michael@0: nsresult rv = mIOService->ExtractScheme(aURL, scheme); michael@0: if(NS_FAILED(rv)) michael@0: return false; michael@0: michael@0: // Get the handler for this scheme. michael@0: nsCOMPtr handler; michael@0: rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler)); michael@0: if(NS_FAILED(rv)) michael@0: return false; michael@0: michael@0: // Is it an external protocol handler? If not, linkify it. michael@0: nsCOMPtr externalHandler = do_QueryInterface(handler); michael@0: if (!externalHandler) michael@0: return true; // handler is built-in, linkify it! michael@0: michael@0: // If external app exists for the scheme then linkify it. michael@0: bool exists; michael@0: rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists); michael@0: return(NS_SUCCEEDED(rv) && exists); michael@0: } michael@0: michael@0: bool michael@0: mozTXTToHTMLConv::CheckURLAndCreateHTML( michael@0: const nsString& txtURL, const nsString& desc, const modetype mode, michael@0: nsString& outputHTML) michael@0: { michael@0: // Create *uri from txtURL michael@0: nsCOMPtr uri; michael@0: nsresult rv; michael@0: // Lazily initialize mIOService michael@0: if (!mIOService) michael@0: { michael@0: mIOService = do_GetIOService(); michael@0: michael@0: if (!mIOService) michael@0: return false; michael@0: } michael@0: michael@0: // See if the url should be linkified. michael@0: NS_ConvertUTF16toUTF8 utf8URL(txtURL); michael@0: if (!ShouldLinkify(utf8URL)) michael@0: return false; michael@0: michael@0: // it would be faster if we could just check to see if there is a protocol michael@0: // handler for the url and return instead of actually trying to create a url... michael@0: rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri)); michael@0: michael@0: // Real work michael@0: if (NS_SUCCEEDED(rv) && uri) michael@0: { michael@0: outputHTML.AssignLiteral(""); michael@0: outputHTML += desc; michael@0: outputHTML.AppendLiteral(""); michael@0: return true; michael@0: } michael@0: else michael@0: return false; michael@0: } michael@0: michael@0: NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos) michael@0: { michael@0: // call FindURL on the passed in string michael@0: nsAutoString outputHTML; // we'll ignore the generated output HTML michael@0: michael@0: *aStartPos = -1; michael@0: *aEndPos = -1; michael@0: michael@0: FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: bool michael@0: mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, michael@0: const uint32_t whathasbeendone, michael@0: nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter) michael@0: { michael@0: enum statetype {unchecked, invalid, startok, endok, success}; michael@0: static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated}; michael@0: michael@0: statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode michael@0: /* I don't like this abuse of enums as index for the array, michael@0: but I don't know a better method */ michael@0: michael@0: // Define, which modes to check michael@0: /* all modes but abbreviated are checked for text[pos] == ':', michael@0: only abbreviated for '.', RFC2396E and abbreviated for '@' */ michael@0: for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode; michael@0: iState = modetype(iState + 1)) michael@0: state[iState] = aInString[pos] == ':' ? unchecked : invalid; michael@0: switch (aInString[pos]) michael@0: { michael@0: case '@': michael@0: state[RFC2396E] = unchecked; michael@0: // no break here michael@0: case '.': michael@0: state[abbreviated] = unchecked; michael@0: break; michael@0: case ':': michael@0: state[abbreviated] = invalid; michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: michael@0: // Test, first successful mode wins, sequence defined by |ranking| michael@0: int32_t iCheck = 0; // the currently tested modetype michael@0: modetype check = ranking[iCheck]; michael@0: for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success; michael@0: iCheck++) michael@0: /* check state from last run. michael@0: If this is the first, check this one, which isn't = success yet */ michael@0: { michael@0: check = ranking[iCheck]; michael@0: michael@0: uint32_t start, end; michael@0: michael@0: if (state[check] == unchecked) michael@0: if (FindURLStart(aInString, aInLength, pos, check, start)) michael@0: state[check] = startok; michael@0: michael@0: if (state[check] == startok) michael@0: if (FindURLEnd(aInString, aInLength, pos, check, start, end)) michael@0: state[check] = endok; michael@0: michael@0: if (state[check] == endok) michael@0: { michael@0: nsAutoString txtURL, desc; michael@0: int32_t resultReplaceBefore, resultReplaceAfter; michael@0: michael@0: CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end, michael@0: txtURL, desc, michael@0: resultReplaceBefore, resultReplaceAfter); michael@0: michael@0: if (aInString[pos] != ':') michael@0: { michael@0: nsAutoString temp = txtURL; michael@0: txtURL.SetLength(0); michael@0: CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL); michael@0: } michael@0: michael@0: if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check, michael@0: outputHTML)) michael@0: { michael@0: replaceBefore = resultReplaceBefore; michael@0: replaceAfter = resultReplaceAfter; michael@0: state[check] = success; michael@0: } michael@0: } // if michael@0: } // for michael@0: return state[check] == success; michael@0: } michael@0: michael@0: bool michael@0: mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString, michael@0: int32_t aInLength, const char16_t* rep, int32_t aRepLen, michael@0: LIMTYPE before, LIMTYPE after) michael@0: { michael@0: michael@0: // this little method gets called a LOT. I found we were spending a michael@0: // lot of time just calculating the length of the variable "rep" michael@0: // over and over again every time we called it. So we're now passing michael@0: // an integer in here. michael@0: int32_t textLen = aInLength; michael@0: michael@0: if michael@0: ( michael@0: ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) michael@0: && textLen < aRepLen) || michael@0: ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) michael@0: && textLen < aRepLen + 1) || michael@0: (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER michael@0: && textLen < aRepLen + 2) michael@0: ) michael@0: return false; michael@0: michael@0: char16_t text0 = aInString[0]; michael@0: char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)]; michael@0: michael@0: if michael@0: ( michael@0: (before == LT_ALPHA michael@0: && !nsCRT::IsAsciiAlpha(text0)) || michael@0: (before == LT_DIGIT michael@0: && !nsCRT::IsAsciiDigit(text0)) || michael@0: (before == LT_DELIMITER michael@0: && michael@0: ( michael@0: nsCRT::IsAsciiAlpha(text0) || michael@0: nsCRT::IsAsciiDigit(text0) || michael@0: text0 == *rep michael@0: )) || michael@0: (after == LT_ALPHA michael@0: && !nsCRT::IsAsciiAlpha(textAfterPos)) || michael@0: (after == LT_DIGIT michael@0: && !nsCRT::IsAsciiDigit(textAfterPos)) || michael@0: (after == LT_DELIMITER michael@0: && michael@0: ( michael@0: nsCRT::IsAsciiAlpha(textAfterPos) || michael@0: nsCRT::IsAsciiDigit(textAfterPos) || michael@0: textAfterPos == *rep michael@0: )) || michael@0: !Substring(Substring(aInString, aInString+aInLength), michael@0: (before == LT_IGNORE ? 0 : 1), michael@0: aRepLen).Equals(Substring(rep, rep+aRepLen), michael@0: nsCaseInsensitiveStringComparator()) michael@0: ) michael@0: return false; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: uint32_t michael@0: mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, michael@0: const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after) michael@0: { michael@0: uint32_t result = 0; michael@0: michael@0: for (int32_t i = 0; i < aInStringLength; i++) michael@0: { michael@0: const char16_t * indexIntoString = &aInString[i]; michael@0: if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after)) michael@0: result++; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: michael@0: // NOTE: the converted html for the phrase is appended to aOutString michael@0: // tagHTML and attributeHTML are plain ASCII (literal strings, in fact) michael@0: bool michael@0: mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, michael@0: const char16_t* tagTXT, int32_t aTagTXTLen, michael@0: const char* tagHTML, const char* attributeHTML, michael@0: nsString& aOutString, uint32_t& openTags) michael@0: { michael@0: /* We're searching for the following pattern: michael@0: LT_DELIMITER - "*" - ALPHA - michael@0: [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER. michael@0: is only inserted, if existence of a pair could be verified michael@0: We use the first opening/closing tag, if we can choose */ michael@0: michael@0: const char16_t * newOffset = aInString; michael@0: int32_t newLength = aInStringLength; michael@0: if (!col0) // skip the first element? michael@0: { michael@0: newOffset = &aInString[1]; michael@0: newLength = aInStringLength - 1; michael@0: } michael@0: michael@0: // opening tag michael@0: if michael@0: ( michael@0: ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, michael@0: (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag michael@0: && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, michael@0: LT_ALPHA, LT_DELIMITER) // remaining closing tags michael@0: > openTags michael@0: ) michael@0: { michael@0: openTags++; michael@0: aOutString.AppendLiteral("<"); michael@0: aOutString.AppendASCII(tagHTML); michael@0: aOutString.Append(char16_t(' ')); michael@0: aOutString.AppendASCII(attributeHTML); michael@0: aOutString.AppendLiteral(">"); michael@0: aOutString.Append(tagTXT); michael@0: aOutString.AppendLiteral(""); michael@0: return true; michael@0: } michael@0: michael@0: // closing tag michael@0: else if (openTags > 0 michael@0: && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER)) michael@0: { michael@0: openTags--; michael@0: aOutString.AppendLiteral(""); michael@0: aOutString.Append(tagTXT); michael@0: aOutString.AppendLiteral("')); michael@0: return true; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: michael@0: bool michael@0: mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, michael@0: const char* tagTXT, const char* imageName, michael@0: nsString& outputHTML, int32_t& glyphTextLen) michael@0: { michael@0: if ( !aInString || !tagTXT || !imageName ) michael@0: return false; michael@0: michael@0: int32_t tagLen = strlen(tagTXT); michael@0: michael@0: uint32_t delim = (col0 ? 0 : 1) + tagLen; michael@0: michael@0: if michael@0: ( michael@0: (col0 || IsSpace(aInString[0])) michael@0: && michael@0: ( michael@0: aLength <= int32_t(delim) || michael@0: IsSpace(aInString[delim]) || michael@0: (aLength > int32_t(delim + 1) michael@0: && michael@0: ( michael@0: aInString[delim] == '.' || michael@0: aInString[delim] == ',' || michael@0: aInString[delim] == ';' || michael@0: aInString[delim] == '8' || michael@0: aInString[delim] == '>' || michael@0: aInString[delim] == '!' || michael@0: aInString[delim] == '?' michael@0: ) michael@0: && IsSpace(aInString[delim + 1])) michael@0: ) michael@0: && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, michael@0: col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE) michael@0: // Note: tests at different pos for LT_IGNORE and LT_DELIMITER michael@0: ) michael@0: { michael@0: if (!col0) michael@0: { michael@0: outputHTML.Truncate(); michael@0: outputHTML.Append(char16_t(' ')); michael@0: } michael@0: michael@0: outputHTML.AppendLiteral(""); // "> michael@0: AppendASCIItoUTF16(tagTXT, outputHTML); // original text michael@0: outputHTML.AppendLiteral(""); // michael@0: glyphTextLen = (col0 ? 0 : 1) + tagLen; michael@0: return true; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: // the glyph is appended to aOutputString instead of the original string... michael@0: bool michael@0: mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, michael@0: nsString& aOutputString, int32_t& glyphTextLen) michael@0: { michael@0: char16_t text0 = aInString[0]; michael@0: char16_t text1 = aInString[1]; michael@0: char16_t firstChar = (col0 ? text0 : text1); michael@0: michael@0: // temporary variable used to store the glyph html text michael@0: nsAutoString outputHTML; michael@0: bool bTestSmilie; michael@0: bool bArg = false; michael@0: int i; michael@0: michael@0: // refactor some of this mess to avoid code duplication and speed execution a bit michael@0: // there are two cases that need to be tried one after another. To avoid a lot of michael@0: // duplicate code, rolling into a loop michael@0: michael@0: i = 0; michael@0: while ( i < 2 ) michael@0: { michael@0: bTestSmilie = false; michael@0: if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O')) michael@0: { michael@0: // first test passed michael@0: michael@0: bTestSmilie = true; michael@0: bArg = col0; michael@0: } michael@0: if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) ) michael@0: { michael@0: // second test passed michael@0: michael@0: bTestSmilie = true; michael@0: bArg = false; michael@0: } michael@0: if ( bTestSmilie && ( michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-)", michael@0: "moz-smiley-s1", // smile michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":)", michael@0: "moz-smiley-s1", // smile michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-D", michael@0: "moz-smiley-s5", // laughing michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-(", michael@0: "moz-smiley-s2", // frown michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":(", michael@0: "moz-smiley-s2", // frown michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-[", michael@0: "moz-smiley-s6", // embarassed michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ";-)", michael@0: "moz-smiley-s3", // wink michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, col0, michael@0: ";)", michael@0: "moz-smiley-s3", // wink michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-\\", michael@0: "moz-smiley-s7", // undecided michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-P", michael@0: "moz-smiley-s4", // tongue michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ";-P", michael@0: "moz-smiley-s4", // tongue michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: "=-O", michael@0: "moz-smiley-s8", // surprise michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-*", michael@0: "moz-smiley-s9", // kiss michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ">:o", michael@0: "moz-smiley-s10", // yell michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ">:-o", michael@0: "moz-smiley-s10", // yell michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: "8-)", michael@0: "moz-smiley-s11", // cool michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-$", michael@0: "moz-smiley-s12", // money michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-!", michael@0: "moz-smiley-s13", // foot michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: "O:-)", michael@0: "moz-smiley-s14", // innocent michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":'(", michael@0: "moz-smiley-s15", // cry michael@0: outputHTML, glyphTextLen) || michael@0: michael@0: SmilyHit(aInString, aInLength, bArg, michael@0: ":-X", michael@0: "moz-smiley-s16", // sealed michael@0: outputHTML, glyphTextLen) michael@0: ) michael@0: ) michael@0: { michael@0: aOutputString.Append(outputHTML); michael@0: return true; michael@0: } michael@0: i++; michael@0: } michael@0: if (text0 == '\f') michael@0: { michael@0: aOutputString.AppendLiteral(""); michael@0: glyphTextLen = 1; michael@0: return true; michael@0: } michael@0: if (text0 == '+' || text1 == '+') michael@0: { michael@0: if (ItMatchesDelimited(aInString, aInLength, michael@0: MOZ_UTF16(" +/-"), 4, michael@0: LT_IGNORE, LT_IGNORE)) michael@0: { michael@0: aOutputString.AppendLiteral(" ±"); michael@0: glyphTextLen = 4; michael@0: return true; michael@0: } michael@0: if (col0 && ItMatchesDelimited(aInString, aInLength, michael@0: MOZ_UTF16("+/-"), 3, michael@0: LT_IGNORE, LT_IGNORE)) michael@0: { michael@0: aOutputString.AppendLiteral("±"); michael@0: glyphTextLen = 3; michael@0: return true; michael@0: } michael@0: } michael@0: michael@0: // x^2 => x2, also handle powers x^-2, x^0.5 michael@0: // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/ michael@0: if michael@0: ( michael@0: text1 == '^' michael@0: && michael@0: ( michael@0: nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) || michael@0: text0 == ')' || text0 == ']' || text0 == '}' michael@0: ) michael@0: && michael@0: ( michael@0: (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) || michael@0: (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3])) michael@0: ) michael@0: ) michael@0: { michael@0: // Find first non-digit michael@0: int32_t delimPos = 3; // skip "^" and first digit (or '-') michael@0: for (; delimPos < aInLength michael@0: && michael@0: ( michael@0: nsCRT::IsAsciiDigit(aInString[delimPos]) || michael@0: (aInString[delimPos] == '.' && delimPos + 1 < aInLength && michael@0: nsCRT::IsAsciiDigit(aInString[delimPos + 1])) michael@0: ); michael@0: delimPos++) michael@0: ; michael@0: michael@0: if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos])) michael@0: { michael@0: return false; michael@0: } michael@0: michael@0: outputHTML.Truncate(); michael@0: outputHTML += text0; michael@0: outputHTML.AppendLiteral( michael@0: "" michael@0: "" michael@0: "^"); michael@0: michael@0: aOutputString.Append(outputHTML); michael@0: aOutputString.Append(&aInString[2], delimPos - 2); michael@0: aOutputString.AppendLiteral(""); michael@0: michael@0: glyphTextLen = delimPos /* - 1 + 1 */ ; michael@0: return true; michael@0: } michael@0: /* michael@0: The following strings are not substituted: michael@0: |TXT |HTML |Reason michael@0: +------+---------+---------- michael@0: -> ← Bug #454 michael@0: => ⇐ dito michael@0: <- → dito michael@0: <= ⇒ dito michael@0: (tm) ™ dito michael@0: 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ... michael@0: 3/4 ¾ dito michael@0: 1/2 ½ similar michael@0: */ michael@0: return false; michael@0: } michael@0: michael@0: /*************************************************************************** michael@0: Library-internal Interface michael@0: ****************************************************************************/ michael@0: michael@0: mozTXTToHTMLConv::mozTXTToHTMLConv() michael@0: { michael@0: } michael@0: michael@0: mozTXTToHTMLConv::~mozTXTToHTMLConv() michael@0: { michael@0: } michael@0: michael@0: NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, michael@0: mozITXTToHTMLConv, michael@0: nsIStreamConverter, michael@0: nsIStreamListener, michael@0: nsIRequestObserver) michael@0: michael@0: int32_t michael@0: mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, michael@0: uint32_t& logLineStart) michael@0: { michael@0: int32_t result = 0; michael@0: int32_t lineLength = NS_strlen(line); michael@0: michael@0: bool moreCites = true; michael@0: while (moreCites) michael@0: { michael@0: /* E.g. the following lines count as quote: michael@0: michael@0: > text michael@0: //#ifdef QUOTE_RECOGNITION_AGGRESSIVE michael@0: >text michael@0: //#ifdef QUOTE_RECOGNITION_AGGRESSIVE michael@0: > text michael@0: ] text michael@0: USER> text michael@0: USER] text michael@0: //#endif michael@0: michael@0: logLineStart is the position of "t" in this example michael@0: */ michael@0: uint32_t i = logLineStart; michael@0: michael@0: #ifdef QUOTE_RECOGNITION_AGGRESSIVE michael@0: for (; int32_t(i) < lineLength && IsSpace(line[i]); i++) michael@0: ; michael@0: for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i]) michael@0: && nsCRT::IsUpper(line[i]) ; i++) michael@0: ; michael@0: if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']')) michael@0: #else michael@0: if (int32_t(i) < lineLength && line[i] == '>') michael@0: #endif michael@0: { michael@0: i++; michael@0: if (int32_t(i) < lineLength && line[i] == ' ') michael@0: i++; michael@0: // sendmail/mbox michael@0: // Placed here for performance increase michael@0: const char16_t * indexString = &line[logLineStart]; michael@0: // here, |logLineStart < lineLength| is always true michael@0: uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString)); michael@0: if (Substring(indexString, michael@0: indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength), michael@0: nsCaseInsensitiveStringComparator())) michael@0: //XXX RFC2646 michael@0: moreCites = false; michael@0: else michael@0: { michael@0: result++; michael@0: logLineStart = i; michael@0: } michael@0: } michael@0: else michael@0: moreCites = false; michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: void michael@0: mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString) michael@0: { michael@0: bool doURLs = 0 != (whattodo & kURLs); michael@0: bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution); michael@0: bool doStructPhrase = 0 != (whattodo & kStructPhrase); michael@0: michael@0: uint32_t structPhrase_strong = 0; // Number of currently open tags michael@0: uint32_t structPhrase_underline = 0; michael@0: uint32_t structPhrase_italic = 0; michael@0: uint32_t structPhrase_code = 0; michael@0: michael@0: nsAutoString outputHTML; // moved here for performance increase michael@0: michael@0: for(uint32_t i = 0; int32_t(i) < aInStringLength;) michael@0: { michael@0: if (doGlyphSubstitution) michael@0: { michael@0: int32_t glyphTextLen; michael@0: if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen)) michael@0: { michael@0: i += glyphTextLen; michael@0: continue; michael@0: } michael@0: } michael@0: michael@0: if (doStructPhrase) michael@0: { michael@0: const char16_t * newOffset = aInString; michael@0: int32_t newLength = aInStringLength; michael@0: if (i > 0 ) // skip the first element? michael@0: { michael@0: newOffset = &aInString[i-1]; michael@0: newLength = aInStringLength - i + 1; michael@0: } michael@0: michael@0: switch (aInString[i]) // Performance increase michael@0: { michael@0: case '*': michael@0: if (StructPhraseHit(newOffset, newLength, i == 0, michael@0: MOZ_UTF16("*"), 1, michael@0: "b", "class=\"moz-txt-star\"", michael@0: aOutString, structPhrase_strong)) michael@0: { michael@0: i++; michael@0: continue; michael@0: } michael@0: break; michael@0: case '/': michael@0: if (StructPhraseHit(newOffset, newLength, i == 0, michael@0: MOZ_UTF16("/"), 1, michael@0: "i", "class=\"moz-txt-slash\"", michael@0: aOutString, structPhrase_italic)) michael@0: { michael@0: i++; michael@0: continue; michael@0: } michael@0: break; michael@0: case '_': michael@0: if (StructPhraseHit(newOffset, newLength, i == 0, michael@0: MOZ_UTF16("_"), 1, michael@0: "span" /* is deprecated */, michael@0: "class=\"moz-txt-underscore\"", michael@0: aOutString, structPhrase_underline)) michael@0: { michael@0: i++; michael@0: continue; michael@0: } michael@0: break; michael@0: case '|': michael@0: if (StructPhraseHit(newOffset, newLength, i == 0, michael@0: MOZ_UTF16("|"), 1, michael@0: "code", "class=\"moz-txt-verticalline\"", michael@0: aOutString, structPhrase_code)) michael@0: { michael@0: i++; michael@0: continue; michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (doURLs) michael@0: { michael@0: switch (aInString[i]) michael@0: { michael@0: case ':': michael@0: case '@': michael@0: case '.': michael@0: if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase michael@0: { michael@0: int32_t replaceBefore; michael@0: int32_t replaceAfter; michael@0: if (FindURL(aInString, aInStringLength, i, whattodo, michael@0: outputHTML, replaceBefore, replaceAfter) michael@0: && structPhrase_strong + structPhrase_italic + michael@0: structPhrase_underline + structPhrase_code == 0 michael@0: /* workaround for bug #19445 */ ) michael@0: { michael@0: aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore); michael@0: aOutString += outputHTML; michael@0: i += replaceAfter + 1; michael@0: continue; michael@0: } michael@0: } michael@0: break; michael@0: } //switch michael@0: } michael@0: michael@0: switch (aInString[i]) michael@0: { michael@0: // Special symbols michael@0: case '<': michael@0: case '>': michael@0: case '&': michael@0: EscapeChar(aInString[i], aOutString, false); michael@0: i++; michael@0: break; michael@0: // Normal characters michael@0: default: michael@0: aOutString += aInString[i]; michael@0: i++; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString) michael@0: { michael@0: // some common variables we were recalculating michael@0: // every time inside the for loop... michael@0: int32_t lengthOfInString = aInString.Length(); michael@0: const char16_t * uniBuffer = aInString.get(); michael@0: michael@0: #ifdef DEBUG_BenB_Perf michael@0: PRTime parsing_start = PR_IntervalNow(); michael@0: #endif michael@0: michael@0: // Look for simple entities not included in a tags and scan them. michael@0: /* Skip all tags ("<[...]>") and content in an a tag ("") michael@0: or in a tag (""). michael@0: Unescape the rest (text between tags) and pass it to ScanTXT. */ michael@0: for (int32_t i = 0; i < lengthOfInString;) michael@0: { michael@0: if (aInString[i] == '<') // html tag michael@0: { michael@0: uint32_t start = uint32_t(i); michael@0: if (nsCRT::ToLower((char)aInString[uint32_t(i) + 1]) == 'a') michael@0: // if a tag, skip until michael@0: { michael@0: i = aInString.Find("", true, i); michael@0: if (i == kNotFound) michael@0: i = lengthOfInString; michael@0: else michael@0: i += 4; michael@0: } michael@0: else if (aInString[uint32_t(i) + 1] == '!' && aInString[uint32_t(i) + 2] == '-' && michael@0: aInString[uint32_t(i) + 3] == '-') michael@0: //if out-commended code, skip until --> michael@0: { michael@0: i = aInString.Find("-->", false, i); michael@0: if (i == kNotFound) michael@0: i = lengthOfInString; michael@0: else michael@0: i += 3; michael@0: michael@0: } michael@0: else // just skip tag (attributes etc.) michael@0: { michael@0: i = aInString.FindChar('>', i); michael@0: if (i == kNotFound) michael@0: i = lengthOfInString; michael@0: else michael@0: i++; michael@0: } michael@0: aOutString.Append(&uniBuffer[start], uint32_t(i) - start); michael@0: } michael@0: else michael@0: { michael@0: uint32_t start = uint32_t(i); michael@0: i = aInString.FindChar('<', i); michael@0: if (i == kNotFound) michael@0: i = lengthOfInString; michael@0: michael@0: nsString tempString; michael@0: tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate)); michael@0: UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString); michael@0: ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString); michael@0: } michael@0: } michael@0: michael@0: #ifdef DEBUG_BenB_Perf michael@0: printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); michael@0: #endif michael@0: } michael@0: michael@0: /**************************************************************************** michael@0: XPCOM Interface michael@0: *****************************************************************************/ michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream, michael@0: const char *aFromType, michael@0: const char *aToType, michael@0: nsISupports *aCtxt, nsIInputStream **_retval) michael@0: { michael@0: return NS_ERROR_NOT_IMPLEMENTED; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::AsyncConvertData(const char *aFromType, michael@0: const char *aToType, michael@0: nsIStreamListener *aListener, nsISupports *aCtxt) { michael@0: return NS_ERROR_NOT_IMPLEMENTED; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt, michael@0: nsIInputStream *inStr, uint64_t sourceOffset, michael@0: uint32_t count) michael@0: { michael@0: return NS_ERROR_NOT_IMPLEMENTED; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt) michael@0: { michael@0: return NS_ERROR_NOT_IMPLEMENTED; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt, michael@0: nsresult aStatus) michael@0: { michael@0: return NS_ERROR_NOT_IMPLEMENTED; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart, michael@0: uint32_t *_retval) michael@0: { michael@0: if (!logLineStart || !_retval || !line) michael@0: return NS_ERROR_NULL_POINTER; michael@0: *_retval = CiteLevelTXT(line, *logLineStart); michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo, michael@0: char16_t **_retval) michael@0: { michael@0: NS_ENSURE_ARG(text); michael@0: michael@0: // FIX ME!!! michael@0: nsString outString; michael@0: int32_t inLength = NS_strlen(text); michael@0: // by setting a large capacity up front, we save time michael@0: // when appending characters to the output string because we don't michael@0: // need to reallocate and re-copy the characters already in the out String. michael@0: NS_ASSERTION(inLength, "ScanTXT passed 0 length string"); michael@0: if (inLength == 0) { michael@0: *_retval = NS_strdup(text); michael@0: return NS_OK; michael@0: } michael@0: michael@0: outString.SetCapacity(uint32_t(inLength * growthRate)); michael@0: ScanTXT(text, inLength, whattodo, outString); michael@0: michael@0: *_retval = ToNewUnicode(outString); michael@0: return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo, michael@0: char16_t **_retval) michael@0: { michael@0: NS_ENSURE_ARG(text); michael@0: michael@0: // FIX ME!!! michael@0: nsString outString; michael@0: nsString inString (text); // look at this nasty extra copy of the entire input buffer! michael@0: outString.SetCapacity(uint32_t(inString.Length() * growthRate)); michael@0: michael@0: ScanHTML(inString, whattodo, outString); michael@0: *_retval = ToNewUnicode(outString); michael@0: return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: nsresult michael@0: MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) michael@0: { michael@0: NS_PRECONDITION(aConv != nullptr, "null ptr"); michael@0: if (!aConv) michael@0: return NS_ERROR_NULL_POINTER; michael@0: michael@0: *aConv = new mozTXTToHTMLConv(); michael@0: if (!*aConv) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: michael@0: NS_ADDREF(*aConv); michael@0: // return (*aConv)->Init(); michael@0: return NS_OK; michael@0: }