intl/unicharutil/src/nsSaveAsCharset.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     8 #include "prmem.h"
     9 #include "prprf.h"
    10 #include "nsICharsetConverterManager.h"
    11 #include "nsSaveAsCharset.h"
    12 #include "nsWhitespaceTokenizer.h"
    13 #include "nsServiceManagerUtils.h"
    15 //
    16 // nsISupports methods
    17 //
    18 NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset)
    20 //
    21 // nsSaveAsCharset
    22 //
    23 nsSaveAsCharset::nsSaveAsCharset()
    24 {
    25   mAttribute = attr_htmlTextDefault;
    26   mEntityVersion = 0;
    27   mCharsetListIndex = -1;
    28 }
    30 nsSaveAsCharset::~nsSaveAsCharset()
    31 {
    32 }
    34 NS_IMETHODIMP
    35 nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion)
    36 {
    37   nsresult rv = NS_OK;
    39   mAttribute = attr;
    40   mEntityVersion = entityVersion;
    42   rv = SetupCharsetList(charset);
    43   NS_ENSURE_SUCCESS(rv, rv);
    45   // set up unicode encoder
    46   rv = SetupUnicodeEncoder(GetNextCharset());
    47   NS_ENSURE_SUCCESS(rv, rv);
    49   // set up entity converter
    50   if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
    51     mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
    53   return rv;
    54 }
    56 NS_IMETHODIMP
    57 nsSaveAsCharset::Convert(const char16_t *inString, char **_retval)
    58 {
    59   NS_ENSURE_ARG_POINTER(_retval);
    60   NS_ENSURE_ARG_POINTER(inString);
    61   if (0 == *inString)
    62     return NS_ERROR_ILLEGAL_VALUE;
    63   nsresult rv = NS_OK;
    65   NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
    66   NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
    68   *_retval = nullptr;
    70   // make sure to start from the first charset in the list
    71   if (mCharsetListIndex > 0) {
    72     mCharsetListIndex = -1;
    73     rv = SetupUnicodeEncoder(GetNextCharset());
    74     NS_ENSURE_SUCCESS(rv, rv);
    75   }
    77   do {
    78     // fallback to the next charset in the list if the last conversion failed by an unmapped character
    79     if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
    80       const char * charset = GetNextCharset();
    81       if (!charset)
    82         break;
    83       rv = SetupUnicodeEncoder(charset);
    84       NS_ENSURE_SUCCESS(rv, rv);
    85       PR_FREEIF(*_retval);
    86     }
    88     if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
    89       NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
    90       NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
    91       char16_t *entity = nullptr;
    92       // do the entity conversion first
    93       rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
    94       if(NS_SUCCEEDED(rv)) {
    95         rv = DoCharsetConversion(entity, _retval);
    96         nsMemory::Free(entity);
    97       }
    98     }
    99     else
   100       rv = DoCharsetConversion(inString, _retval);
   102   } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
   104   return rv;
   105 }
   107 NS_IMETHODIMP 
   108 nsSaveAsCharset::GetCharset(char * *aCharset)
   109 {
   110   NS_ENSURE_ARG(aCharset);
   111   NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
   112   NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
   114   const char* charset = mCharsetList[mCharsetListIndex].get();
   115   if (!charset) {
   116     *aCharset = nullptr;
   117     NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
   118     return NS_ERROR_FAILURE;
   119   }
   121   *aCharset = strdup(charset);
   122   return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
   123 }
   125 /////////////////////////////////////////////////////////////////////////////////////////
   127 #define RESERVE_FALLBACK_BYTES 512
   129 // do the fallback, reallocate the buffer if necessary
   130 // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
   131 NS_IMETHODIMP
   132 nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, 
   133                                 int32_t *currentPos, int32_t estimatedLength)
   134 {
   135   NS_ENSURE_ARG_POINTER(outString);
   136   NS_ENSURE_ARG_POINTER(bufferLength);
   137   NS_ENSURE_ARG_POINTER(currentPos);
   139   char fallbackStr[256];
   140   nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
   141   if (NS_SUCCEEDED(rv)) {
   142     int32_t tempLen = (int32_t) strlen(fallbackStr);
   144     // reallocate if the buffer is not large enough
   145     if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
   146       int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES;
   147       // + 1 is for the terminating NUL, don't add that to bufferLength
   148       char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1);
   149       if (temp) {
   150         // adjust length/pointer after realloc
   151         *bufferLength += addLength;
   152         *outString = temp;
   153       } else {
   154         *outString = nullptr;
   155         *bufferLength = 0;
   156         return NS_ERROR_OUT_OF_MEMORY;
   157       }
   158     }
   159     memcpy((*outString + *currentPos), fallbackStr, tempLen);
   160     *currentPos += tempLen;
   161   }
   162   return rv;
   163 }
   165 NS_IMETHODIMP
   166 nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString)
   167 {
   168   NS_ENSURE_ARG_POINTER(outString);
   170   *outString = nullptr;
   172   nsresult rv;
   173   int32_t inStringLength = NS_strlen(inString);       // original input string length
   174   int32_t bufferLength;                               // allocated buffer length
   175   int32_t srcLength = inStringLength;
   176   int32_t dstLength;
   177   int32_t pos1, pos2;
   178   nsresult saveResult = NS_OK;                         // to remember NS_ERROR_UENC_NOMAPPING
   180   // estimate and allocate the target buffer (reserve extra memory for fallback)
   181   rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
   182   if (NS_FAILED(rv)) return rv;
   184   bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback
   185   // + 1 is for the terminating NUL -- we don't add that to bufferLength so that
   186   // we can always write dstPtr[pos2] = '\0' even when the encoder filled the
   187   // buffer.
   188   char *dstPtr = (char *) PR_Malloc(bufferLength + 1);
   189   if (!dstPtr) {
   190     return NS_ERROR_OUT_OF_MEMORY;
   191   }
   193   for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
   194     // convert from unicode
   195     dstLength = bufferLength - pos2;
   196     NS_ASSERTION(dstLength >= 0, "out of bounds write");
   197     rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
   199     pos1 += srcLength ? srcLength : 1;
   200     pos2 += dstLength;
   201     dstPtr[pos2] = '\0';
   203     // break: this is usually the case (no error) OR unrecoverable error
   204     if (NS_ERROR_UENC_NOMAPPING != rv) break;
   206     // remember this happened and reset the result
   207     saveResult = rv;
   208     rv = NS_OK;
   210     // finish encoder, give it a chance to write extra data like escape sequences
   211     dstLength = bufferLength - pos2;
   212     rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
   213     if (NS_SUCCEEDED(rv)) {
   214       pos2 += dstLength;
   215       dstPtr[pos2] = '\0';
   216     }
   218     srcLength = inStringLength - pos1;
   220     // do the fallback
   221     if (!ATTR_NO_FALLBACK(mAttribute)) {
   222       uint32_t unMappedChar;
   223       if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && 
   224           inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
   225         unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
   226         pos1++;
   227       } else {
   228         unMappedChar = inString[pos1-1];
   229       }
   231       rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
   232       if (NS_FAILED(rv)) 
   233         break;
   235       rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
   236       if (NS_FAILED(rv)) 
   237         break;
   238       dstPtr[pos2] = '\0';
   239     }
   240   }
   242   if (NS_SUCCEEDED(rv)) {
   243     // finish encoder, give it a chance to write extra data like escape sequences
   244     dstLength = bufferLength - pos2;
   245     rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
   246     if (NS_SUCCEEDED(rv)) {
   247       pos2 += dstLength;
   248       dstPtr[pos2] = '\0';
   249     }
   250   }
   252   if (NS_FAILED(rv)) {
   253     PR_FREEIF(dstPtr);
   254     return rv;
   255   }
   257   *outString = dstPtr;      // set the result string
   259   // set error code so that the caller can do own fall back
   260   if (NS_ERROR_UENC_NOMAPPING == saveResult) {
   261     rv = NS_ERROR_UENC_NOMAPPING;
   262   }
   264   return rv;
   265 }
   267 NS_IMETHODIMP
   268 nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength)
   269 {
   270   NS_ENSURE_ARG_POINTER(outString);
   272   *outString = '\0';
   274   nsresult rv = NS_OK;
   276   if (ATTR_NO_FALLBACK(mAttribute)) {
   277     return NS_OK;
   278   }
   279   if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
   280     char *entity = nullptr;
   281     rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
   282     if (NS_SUCCEEDED(rv)) {
   283       if (!entity || (int32_t)strlen(entity) > bufferLength) {
   284         return NS_ERROR_OUT_OF_MEMORY;
   285       }
   286       PL_strcpy(outString, entity);
   287       nsMemory::Free(entity);
   288       return rv;
   289     }
   290   }
   292   switch (MASK_FALLBACK(mAttribute)) {
   293   case attr_FallbackQuestionMark:
   294     if(bufferLength>=2) {
   295       *outString++='?';
   296       *outString='\0';
   297       rv = NS_OK;
   298     } else {
   299       rv = NS_ERROR_FAILURE;
   300     }
   301     break;
   302   case attr_FallbackEscapeU:
   303     if (inUCS4 & 0xff0000)
   304       rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
   305     else
   306       rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
   307     break;
   308   case attr_FallbackDecimalNCR:
   309     rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
   310     break;
   311   case attr_FallbackHexNCR:
   312     rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
   313     break;
   314   case attr_FallbackNone:
   315     rv = NS_OK;
   316     break;
   317   default:
   318     rv = NS_ERROR_ILLEGAL_VALUE;
   319     break;
   320   }
   322 	return rv;
   323 }
   325 nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
   326 {
   327   NS_ENSURE_ARG(charset);
   328   nsresult rv;
   330   // set up unicode encoder
   331   nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
   332   NS_ENSURE_SUCCESS(rv, rv);
   334   return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
   335 }
   337 nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
   338 {
   339   NS_ENSURE_ARG(charsetList);
   341   NS_ASSERTION(charsetList[0], "charsetList should not be empty");
   342   if (!charsetList[0])
   343     return NS_ERROR_INVALID_ARG;
   345   if (mCharsetListIndex >= 0) {
   346     mCharsetList.Clear();
   347     mCharsetListIndex = -1;
   348   }
   350   nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList);
   351   while (tokenizer.hasMoreTokens()) {
   352     ParseString(tokenizer.nextToken(), ',', mCharsetList);
   353   }
   355   return NS_OK;
   356 }
   358 const char * nsSaveAsCharset::GetNextCharset()
   359 {
   360   if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length()))
   361     return nullptr;
   363   // bump the index and return the next charset
   364   return mCharsetList[++mCharsetListIndex].get();
   365 }

mercurial