michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsLinebreakConverter.h" michael@0: michael@0: #include "nsMemory.h" michael@0: #include "nsCRT.h" michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: GetLinebreakString michael@0: michael@0: Could make this inline michael@0: ----------------------------------------------------------------------------*/ michael@0: static const char* GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType) michael@0: { michael@0: static const char* const sLinebreaks[] = { michael@0: "", // any michael@0: NS_LINEBREAK, // platform michael@0: LFSTR, // content michael@0: CRLF, // net michael@0: CRSTR, // Mac michael@0: LFSTR, // Unix michael@0: CRLF, // Windows michael@0: " ", // space michael@0: nullptr michael@0: }; michael@0: michael@0: return sLinebreaks[aBreakType]; michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: AppendLinebreak michael@0: michael@0: Wee inline method to append a line break. Modifies ioDest. michael@0: ----------------------------------------------------------------------------*/ michael@0: template michael@0: void AppendLinebreak(T*& ioDest, const char* lineBreakStr) michael@0: { michael@0: *ioDest++ = *lineBreakStr; michael@0: michael@0: if (lineBreakStr[1]) michael@0: *ioDest++ = lineBreakStr[1]; michael@0: } michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: CountChars michael@0: michael@0: Counts occurrences of breakStr in aSrc michael@0: ----------------------------------------------------------------------------*/ michael@0: template michael@0: int32_t CountLinebreaks(const T* aSrc, int32_t inLen, const char* breakStr) michael@0: { michael@0: const T* src = aSrc; michael@0: const T* srcEnd = aSrc + inLen; michael@0: int32_t theCount = 0; michael@0: michael@0: while (src < srcEnd) michael@0: { michael@0: if (*src == *breakStr) michael@0: { michael@0: src++; michael@0: michael@0: if (breakStr[1]) michael@0: { michael@0: if (src < srcEnd && *src == breakStr[1]) michael@0: { michael@0: src++; michael@0: theCount++; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: theCount++; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: src++; michael@0: } michael@0: } michael@0: michael@0: return theCount; michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertBreaks michael@0: michael@0: ioLen *includes* a terminating null, if any michael@0: ----------------------------------------------------------------------------*/ michael@0: template michael@0: static T* ConvertBreaks(const T* inSrc, int32_t& ioLen, const char* srcBreak, const char* destBreak) michael@0: { michael@0: NS_ASSERTION(inSrc && srcBreak && destBreak, "Got a null string"); michael@0: michael@0: T* resultString = nullptr; michael@0: michael@0: // handle the no conversion case michael@0: if (nsCRT::strcmp(srcBreak, destBreak) == 0) michael@0: { michael@0: resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen); michael@0: if (!resultString) return nullptr; michael@0: memcpy(resultString, inSrc, sizeof(T) * ioLen); // includes the null, if any michael@0: return resultString; michael@0: } michael@0: michael@0: int32_t srcBreakLen = strlen(srcBreak); michael@0: int32_t destBreakLen = strlen(destBreak); michael@0: michael@0: // handle the easy case, where the string length does not change, and the michael@0: // breaks are only 1 char long, i.e. CR <-> LF michael@0: if (srcBreakLen == destBreakLen && srcBreakLen == 1) michael@0: { michael@0: resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen); michael@0: if (!resultString) return nullptr; michael@0: michael@0: const T* src = inSrc; michael@0: const T* srcEnd = inSrc + ioLen; // includes null, if any michael@0: T* dst = resultString; michael@0: michael@0: char srcBreakChar = *srcBreak; // we know it's one char long already michael@0: char dstBreakChar = *destBreak; michael@0: michael@0: while (src < srcEnd) michael@0: { michael@0: if (*src == srcBreakChar) michael@0: { michael@0: *dst++ = dstBreakChar; michael@0: src++; michael@0: } michael@0: else michael@0: { michael@0: *dst++ = *src++; michael@0: } michael@0: } michael@0: michael@0: // ioLen does not change michael@0: } michael@0: else michael@0: { michael@0: // src and dest termination is different length. Do it a slower way. michael@0: michael@0: // count linebreaks in src. Assumes that chars in 2-char linebreaks are unique. michael@0: int32_t numLinebreaks = CountLinebreaks(inSrc, ioLen, srcBreak); michael@0: michael@0: int32_t newBufLen = ioLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen); michael@0: resultString = (T *)nsMemory::Alloc(sizeof(T) * newBufLen); michael@0: if (!resultString) return nullptr; michael@0: michael@0: const T* src = inSrc; michael@0: const T* srcEnd = inSrc + ioLen; // includes null, if any michael@0: T* dst = resultString; michael@0: michael@0: while (src < srcEnd) michael@0: { michael@0: if (*src == *srcBreak) michael@0: { michael@0: *dst++ = *destBreak; michael@0: if (destBreak[1]) michael@0: *dst++ = destBreak[1]; michael@0: michael@0: src++; michael@0: if (src < srcEnd && srcBreak[1] && *src == srcBreak[1]) michael@0: src++; michael@0: } michael@0: else michael@0: { michael@0: *dst++ = *src++; michael@0: } michael@0: } michael@0: michael@0: ioLen = newBufLen; michael@0: } michael@0: michael@0: return resultString; michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertBreaksInSitu michael@0: michael@0: Convert breaks in situ. Can only do this if the linebreak length michael@0: does not change. michael@0: ----------------------------------------------------------------------------*/ michael@0: template michael@0: static void ConvertBreaksInSitu(T* inSrc, int32_t inLen, char srcBreak, char destBreak) michael@0: { michael@0: T* src = inSrc; michael@0: T* srcEnd = inSrc + inLen; michael@0: michael@0: while (src < srcEnd) michael@0: { michael@0: if (*src == srcBreak) michael@0: *src = destBreak; michael@0: michael@0: src++; michael@0: } michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertUnknownBreaks michael@0: michael@0: Convert unknown line breaks to the specified break. michael@0: michael@0: This will convert CRLF pairs to one break, and single CR or LF to a break. michael@0: ----------------------------------------------------------------------------*/ michael@0: template michael@0: static T* ConvertUnknownBreaks(const T* inSrc, int32_t& ioLen, const char* destBreak) michael@0: { michael@0: const T* src = inSrc; michael@0: const T* srcEnd = inSrc + ioLen; // includes null, if any michael@0: michael@0: int32_t destBreakLen = strlen(destBreak); michael@0: int32_t finalLen = 0; michael@0: michael@0: while (src < srcEnd) michael@0: { michael@0: if (*src == nsCRT::CR) michael@0: { michael@0: if (src < srcEnd && src[1] == nsCRT::LF) michael@0: { michael@0: // CRLF michael@0: finalLen += destBreakLen; michael@0: src++; michael@0: } michael@0: else michael@0: { michael@0: // Lone CR michael@0: finalLen += destBreakLen; michael@0: } michael@0: } michael@0: else if (*src == nsCRT::LF) michael@0: { michael@0: // Lone LF michael@0: finalLen += destBreakLen; michael@0: } michael@0: else michael@0: { michael@0: finalLen++; michael@0: } michael@0: src++; michael@0: } michael@0: michael@0: T* resultString = (T *)nsMemory::Alloc(sizeof(T) * finalLen); michael@0: if (!resultString) return nullptr; michael@0: michael@0: src = inSrc; michael@0: srcEnd = inSrc + ioLen; // includes null, if any michael@0: michael@0: T* dst = resultString; michael@0: michael@0: while (src < srcEnd) michael@0: { michael@0: if (*src == nsCRT::CR) michael@0: { michael@0: if (src < srcEnd && src[1] == nsCRT::LF) michael@0: { michael@0: // CRLF michael@0: AppendLinebreak(dst, destBreak); michael@0: src++; michael@0: } michael@0: else michael@0: { michael@0: // Lone CR michael@0: AppendLinebreak(dst, destBreak); michael@0: } michael@0: } michael@0: else if (*src == nsCRT::LF) michael@0: { michael@0: // Lone LF michael@0: AppendLinebreak(dst, destBreak); michael@0: } michael@0: else michael@0: { michael@0: *dst++ = *src; michael@0: } michael@0: src++; michael@0: } michael@0: michael@0: ioLen = finalLen; michael@0: return resultString; michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertLineBreaks michael@0: michael@0: ----------------------------------------------------------------------------*/ michael@0: char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc, michael@0: ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) michael@0: { michael@0: NS_ASSERTION(aDestBreaks != eLinebreakAny && michael@0: aSrcBreaks != eLinebreakSpace, "Invalid parameter"); michael@0: if (!aSrc) return nullptr; michael@0: michael@0: int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen; michael@0: michael@0: char* resultString; michael@0: if (aSrcBreaks == eLinebreakAny) michael@0: resultString = ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks)); michael@0: else michael@0: resultString = ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks)); michael@0: michael@0: if (outLen) michael@0: *outLen = sourceLen; michael@0: return resultString; michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertLineBreaksInSitu michael@0: michael@0: ----------------------------------------------------------------------------*/ michael@0: nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(char **ioBuffer, ELinebreakType aSrcBreaks, michael@0: ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) michael@0: { michael@0: NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed"); michael@0: if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER; michael@0: michael@0: NS_ASSERTION(aDestBreaks != eLinebreakAny && michael@0: aSrcBreaks != eLinebreakSpace, "Invalid parameter"); michael@0: michael@0: int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*ioBuffer) + 1 : aSrcLen; michael@0: michael@0: // can we convert in-place? michael@0: const char* srcBreaks = GetLinebreakString(aSrcBreaks); michael@0: const char* dstBreaks = GetLinebreakString(aDestBreaks); michael@0: michael@0: if ( (aSrcBreaks != eLinebreakAny) && michael@0: (strlen(srcBreaks) == 1) && michael@0: (strlen(dstBreaks) == 1) ) michael@0: { michael@0: ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks); michael@0: if (outLen) michael@0: *outLen = sourceLen; michael@0: } michael@0: else michael@0: { michael@0: char* destBuffer; michael@0: michael@0: if (aSrcBreaks == eLinebreakAny) michael@0: destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks); michael@0: else michael@0: destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks); michael@0: michael@0: if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY; michael@0: *ioBuffer = destBuffer; michael@0: if (outLen) michael@0: *outLen = sourceLen; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertUnicharLineBreaks michael@0: michael@0: ----------------------------------------------------------------------------*/ michael@0: char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks(const char16_t* aSrc, michael@0: ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) michael@0: { michael@0: NS_ASSERTION(aDestBreaks != eLinebreakAny && michael@0: aSrcBreaks != eLinebreakSpace, "Invalid parameter"); michael@0: if (!aSrc) return nullptr; michael@0: michael@0: int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen; michael@0: michael@0: char16_t* resultString; michael@0: if (aSrcBreaks == eLinebreakAny) michael@0: resultString = ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks)); michael@0: else michael@0: resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks)); michael@0: michael@0: if (outLen) michael@0: *outLen = bufLen; michael@0: return resultString; michael@0: } michael@0: michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertStringLineBreaks michael@0: michael@0: ----------------------------------------------------------------------------*/ michael@0: nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(char16_t **ioBuffer, michael@0: ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) michael@0: { michael@0: NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed"); michael@0: if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER; michael@0: NS_ASSERTION(aDestBreaks != eLinebreakAny && michael@0: aSrcBreaks != eLinebreakSpace, "Invalid parameter"); michael@0: michael@0: int32_t sourceLen = (aSrcLen == kIgnoreLen) ? NS_strlen(*ioBuffer) + 1 : aSrcLen; michael@0: michael@0: // can we convert in-place? michael@0: const char* srcBreaks = GetLinebreakString(aSrcBreaks); michael@0: const char* dstBreaks = GetLinebreakString(aDestBreaks); michael@0: michael@0: if ( (aSrcBreaks != eLinebreakAny) && michael@0: (strlen(srcBreaks) == 1) && michael@0: (strlen(dstBreaks) == 1) ) michael@0: { michael@0: ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks); michael@0: if (outLen) michael@0: *outLen = sourceLen; michael@0: } michael@0: else michael@0: { michael@0: char16_t* destBuffer; michael@0: michael@0: if (aSrcBreaks == eLinebreakAny) michael@0: destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks); michael@0: else michael@0: destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks); michael@0: michael@0: if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY; michael@0: *ioBuffer = destBuffer; michael@0: if (outLen) michael@0: *outLen = sourceLen; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: /*---------------------------------------------------------------------------- michael@0: ConvertStringLineBreaks michael@0: michael@0: ----------------------------------------------------------------------------*/ michael@0: nsresult nsLinebreakConverter::ConvertStringLineBreaks(nsString& ioString, michael@0: ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks) michael@0: { michael@0: michael@0: NS_ASSERTION(aDestBreaks != eLinebreakAny && michael@0: aSrcBreaks != eLinebreakSpace, "Invalid parameter"); michael@0: michael@0: // nothing to do michael@0: if (ioString.IsEmpty()) return NS_OK; michael@0: michael@0: nsresult rv; michael@0: michael@0: // remember the old buffer in case michael@0: // we blow it away later michael@0: nsString::char_iterator stringBuf; michael@0: ioString.BeginWriting(stringBuf); michael@0: michael@0: int32_t newLen; michael@0: michael@0: rv = ConvertUnicharLineBreaksInSitu(&stringBuf, michael@0: aSrcBreaks, aDestBreaks, michael@0: ioString.Length() + 1, &newLen); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: if (stringBuf != ioString.get()) michael@0: ioString.Adopt(stringBuf); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: michael@0: