michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: michael@0: #include "prmem.h" michael@0: #include "prprf.h" michael@0: #include "nsICharsetConverterManager.h" michael@0: #include "nsSaveAsCharset.h" michael@0: #include "nsWhitespaceTokenizer.h" michael@0: #include "nsServiceManagerUtils.h" michael@0: michael@0: // michael@0: // nsISupports methods michael@0: // michael@0: NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset) michael@0: michael@0: // michael@0: // nsSaveAsCharset michael@0: // michael@0: nsSaveAsCharset::nsSaveAsCharset() michael@0: { michael@0: mAttribute = attr_htmlTextDefault; michael@0: mEntityVersion = 0; michael@0: mCharsetListIndex = -1; michael@0: } michael@0: michael@0: nsSaveAsCharset::~nsSaveAsCharset() michael@0: { michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion) michael@0: { michael@0: nsresult rv = NS_OK; michael@0: michael@0: mAttribute = attr; michael@0: mEntityVersion = entityVersion; michael@0: michael@0: rv = SetupCharsetList(charset); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: // set up unicode encoder michael@0: rv = SetupUnicodeEncoder(GetNextCharset()); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: // set up entity converter michael@0: if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter) michael@0: mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv); michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSaveAsCharset::Convert(const char16_t *inString, char **_retval) michael@0: { michael@0: NS_ENSURE_ARG_POINTER(_retval); michael@0: NS_ENSURE_ARG_POINTER(inString); michael@0: if (0 == *inString) michael@0: return NS_ERROR_ILLEGAL_VALUE; michael@0: nsresult rv = NS_OK; michael@0: michael@0: NS_ASSERTION(mEncoder, "need to call Init() before Convert()"); michael@0: NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE); michael@0: michael@0: *_retval = nullptr; michael@0: michael@0: // make sure to start from the first charset in the list michael@0: if (mCharsetListIndex > 0) { michael@0: mCharsetListIndex = -1; michael@0: rv = SetupUnicodeEncoder(GetNextCharset()); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: } michael@0: michael@0: do { michael@0: // fallback to the next charset in the list if the last conversion failed by an unmapped character michael@0: if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) { michael@0: const char * charset = GetNextCharset(); michael@0: if (!charset) michael@0: break; michael@0: rv = SetupUnicodeEncoder(charset); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: PR_FREEIF(*_retval); michael@0: } michael@0: michael@0: if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) { michael@0: NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()"); michael@0: NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE); michael@0: char16_t *entity = nullptr; michael@0: // do the entity conversion first michael@0: rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity); michael@0: if(NS_SUCCEEDED(rv)) { michael@0: rv = DoCharsetConversion(entity, _retval); michael@0: nsMemory::Free(entity); michael@0: } michael@0: } michael@0: else michael@0: rv = DoCharsetConversion(inString, _retval); michael@0: michael@0: } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv); michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSaveAsCharset::GetCharset(char * *aCharset) michael@0: { michael@0: NS_ENSURE_ARG(aCharset); michael@0: NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first"); michael@0: NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE); michael@0: michael@0: const char* charset = mCharsetList[mCharsetListIndex].get(); michael@0: if (!charset) { michael@0: *aCharset = nullptr; michael@0: NS_ASSERTION(charset, "make sure to call Init() with non empty charset list"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: *aCharset = strdup(charset); michael@0: return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: ///////////////////////////////////////////////////////////////////////////////////////// michael@0: michael@0: #define RESERVE_FALLBACK_BYTES 512 michael@0: michael@0: // do the fallback, reallocate the buffer if necessary michael@0: // need to pass destination buffer info (size, current position and estimation of rest of the conversion) michael@0: NS_IMETHODIMP michael@0: nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, michael@0: int32_t *currentPos, int32_t estimatedLength) michael@0: { michael@0: NS_ENSURE_ARG_POINTER(outString); michael@0: NS_ENSURE_ARG_POINTER(bufferLength); michael@0: NS_ENSURE_ARG_POINTER(currentPos); michael@0: michael@0: char fallbackStr[256]; michael@0: nsresult rv = DoConversionFallBack(character, fallbackStr, 256); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: int32_t tempLen = (int32_t) strlen(fallbackStr); michael@0: michael@0: // reallocate if the buffer is not large enough michael@0: if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) { michael@0: int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES; michael@0: // + 1 is for the terminating NUL, don't add that to bufferLength michael@0: char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1); michael@0: if (temp) { michael@0: // adjust length/pointer after realloc michael@0: *bufferLength += addLength; michael@0: *outString = temp; michael@0: } else { michael@0: *outString = nullptr; michael@0: *bufferLength = 0; michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: } michael@0: memcpy((*outString + *currentPos), fallbackStr, tempLen); michael@0: *currentPos += tempLen; michael@0: } michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString) michael@0: { michael@0: NS_ENSURE_ARG_POINTER(outString); michael@0: michael@0: *outString = nullptr; michael@0: michael@0: nsresult rv; michael@0: int32_t inStringLength = NS_strlen(inString); // original input string length michael@0: int32_t bufferLength; // allocated buffer length michael@0: int32_t srcLength = inStringLength; michael@0: int32_t dstLength; michael@0: int32_t pos1, pos2; michael@0: nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING michael@0: michael@0: // estimate and allocate the target buffer (reserve extra memory for fallback) michael@0: rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback michael@0: // + 1 is for the terminating NUL -- we don't add that to bufferLength so that michael@0: // we can always write dstPtr[pos2] = '\0' even when the encoder filled the michael@0: // buffer. michael@0: char *dstPtr = (char *) PR_Malloc(bufferLength + 1); michael@0: if (!dstPtr) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { michael@0: // convert from unicode michael@0: dstLength = bufferLength - pos2; michael@0: NS_ASSERTION(dstLength >= 0, "out of bounds write"); michael@0: rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); michael@0: michael@0: pos1 += srcLength ? srcLength : 1; michael@0: pos2 += dstLength; michael@0: dstPtr[pos2] = '\0'; michael@0: michael@0: // break: this is usually the case (no error) OR unrecoverable error michael@0: if (NS_ERROR_UENC_NOMAPPING != rv) break; michael@0: michael@0: // remember this happened and reset the result michael@0: saveResult = rv; michael@0: rv = NS_OK; michael@0: michael@0: // finish encoder, give it a chance to write extra data like escape sequences michael@0: dstLength = bufferLength - pos2; michael@0: rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: pos2 += dstLength; michael@0: dstPtr[pos2] = '\0'; michael@0: } michael@0: michael@0: srcLength = inStringLength - pos1; michael@0: michael@0: // do the fallback michael@0: if (!ATTR_NO_FALLBACK(mAttribute)) { michael@0: uint32_t unMappedChar; michael@0: if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && michael@0: inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { michael@0: unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); michael@0: pos1++; michael@0: } else { michael@0: unMappedChar = inString[pos1-1]; michael@0: } michael@0: michael@0: rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); michael@0: if (NS_FAILED(rv)) michael@0: break; michael@0: michael@0: rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); michael@0: if (NS_FAILED(rv)) michael@0: break; michael@0: dstPtr[pos2] = '\0'; michael@0: } michael@0: } michael@0: michael@0: if (NS_SUCCEEDED(rv)) { michael@0: // finish encoder, give it a chance to write extra data like escape sequences michael@0: dstLength = bufferLength - pos2; michael@0: rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: pos2 += dstLength; michael@0: dstPtr[pos2] = '\0'; michael@0: } michael@0: } michael@0: michael@0: if (NS_FAILED(rv)) { michael@0: PR_FREEIF(dstPtr); michael@0: return rv; michael@0: } michael@0: michael@0: *outString = dstPtr; // set the result string michael@0: michael@0: // set error code so that the caller can do own fall back michael@0: if (NS_ERROR_UENC_NOMAPPING == saveResult) { michael@0: rv = NS_ERROR_UENC_NOMAPPING; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength) michael@0: { michael@0: NS_ENSURE_ARG_POINTER(outString); michael@0: michael@0: *outString = '\0'; michael@0: michael@0: nsresult rv = NS_OK; michael@0: michael@0: if (ATTR_NO_FALLBACK(mAttribute)) { michael@0: return NS_OK; michael@0: } michael@0: if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) { michael@0: char *entity = nullptr; michael@0: rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: if (!entity || (int32_t)strlen(entity) > bufferLength) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: PL_strcpy(outString, entity); michael@0: nsMemory::Free(entity); michael@0: return rv; michael@0: } michael@0: } michael@0: michael@0: switch (MASK_FALLBACK(mAttribute)) { michael@0: case attr_FallbackQuestionMark: michael@0: if(bufferLength>=2) { michael@0: *outString++='?'; michael@0: *outString='\0'; michael@0: rv = NS_OK; michael@0: } else { michael@0: rv = NS_ERROR_FAILURE; michael@0: } michael@0: break; michael@0: case attr_FallbackEscapeU: michael@0: if (inUCS4 & 0xff0000) michael@0: rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; michael@0: else michael@0: rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; michael@0: break; michael@0: case attr_FallbackDecimalNCR: michael@0: rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; michael@0: break; michael@0: case attr_FallbackHexNCR: michael@0: rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; michael@0: break; michael@0: case attr_FallbackNone: michael@0: rv = NS_OK; michael@0: break; michael@0: default: michael@0: rv = NS_ERROR_ILLEGAL_VALUE; michael@0: break; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset) michael@0: { michael@0: NS_ENSURE_ARG(charset); michael@0: nsresult rv; michael@0: michael@0: // set up unicode encoder michael@0: nsCOMPtr ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder)); michael@0: } michael@0: michael@0: nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList) michael@0: { michael@0: NS_ENSURE_ARG(charsetList); michael@0: michael@0: NS_ASSERTION(charsetList[0], "charsetList should not be empty"); michael@0: if (!charsetList[0]) michael@0: return NS_ERROR_INVALID_ARG; michael@0: michael@0: if (mCharsetListIndex >= 0) { michael@0: mCharsetList.Clear(); michael@0: mCharsetListIndex = -1; michael@0: } michael@0: michael@0: nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList); michael@0: while (tokenizer.hasMoreTokens()) { michael@0: ParseString(tokenizer.nextToken(), ',', mCharsetList); michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: const char * nsSaveAsCharset::GetNextCharset() michael@0: { michael@0: if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length())) michael@0: return nullptr; michael@0: michael@0: // bump the index and return the next charset michael@0: return mCharsetList[++mCharsetListIndex].get(); michael@0: }