intl/unicharutil/src/nsSaveAsCharset.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7
michael@0 8 #include "prmem.h"
michael@0 9 #include "prprf.h"
michael@0 10 #include "nsICharsetConverterManager.h"
michael@0 11 #include "nsSaveAsCharset.h"
michael@0 12 #include "nsWhitespaceTokenizer.h"
michael@0 13 #include "nsServiceManagerUtils.h"
michael@0 14
michael@0 15 //
michael@0 16 // nsISupports methods
michael@0 17 //
michael@0 18 NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset)
michael@0 19
michael@0 20 //
michael@0 21 // nsSaveAsCharset
michael@0 22 //
michael@0 23 nsSaveAsCharset::nsSaveAsCharset()
michael@0 24 {
michael@0 25 mAttribute = attr_htmlTextDefault;
michael@0 26 mEntityVersion = 0;
michael@0 27 mCharsetListIndex = -1;
michael@0 28 }
michael@0 29
michael@0 30 nsSaveAsCharset::~nsSaveAsCharset()
michael@0 31 {
michael@0 32 }
michael@0 33
michael@0 34 NS_IMETHODIMP
michael@0 35 nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion)
michael@0 36 {
michael@0 37 nsresult rv = NS_OK;
michael@0 38
michael@0 39 mAttribute = attr;
michael@0 40 mEntityVersion = entityVersion;
michael@0 41
michael@0 42 rv = SetupCharsetList(charset);
michael@0 43 NS_ENSURE_SUCCESS(rv, rv);
michael@0 44
michael@0 45 // set up unicode encoder
michael@0 46 rv = SetupUnicodeEncoder(GetNextCharset());
michael@0 47 NS_ENSURE_SUCCESS(rv, rv);
michael@0 48
michael@0 49 // set up entity converter
michael@0 50 if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
michael@0 51 mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
michael@0 52
michael@0 53 return rv;
michael@0 54 }
michael@0 55
michael@0 56 NS_IMETHODIMP
michael@0 57 nsSaveAsCharset::Convert(const char16_t *inString, char **_retval)
michael@0 58 {
michael@0 59 NS_ENSURE_ARG_POINTER(_retval);
michael@0 60 NS_ENSURE_ARG_POINTER(inString);
michael@0 61 if (0 == *inString)
michael@0 62 return NS_ERROR_ILLEGAL_VALUE;
michael@0 63 nsresult rv = NS_OK;
michael@0 64
michael@0 65 NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
michael@0 66 NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
michael@0 67
michael@0 68 *_retval = nullptr;
michael@0 69
michael@0 70 // make sure to start from the first charset in the list
michael@0 71 if (mCharsetListIndex > 0) {
michael@0 72 mCharsetListIndex = -1;
michael@0 73 rv = SetupUnicodeEncoder(GetNextCharset());
michael@0 74 NS_ENSURE_SUCCESS(rv, rv);
michael@0 75 }
michael@0 76
michael@0 77 do {
michael@0 78 // fallback to the next charset in the list if the last conversion failed by an unmapped character
michael@0 79 if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
michael@0 80 const char * charset = GetNextCharset();
michael@0 81 if (!charset)
michael@0 82 break;
michael@0 83 rv = SetupUnicodeEncoder(charset);
michael@0 84 NS_ENSURE_SUCCESS(rv, rv);
michael@0 85 PR_FREEIF(*_retval);
michael@0 86 }
michael@0 87
michael@0 88 if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
michael@0 89 NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
michael@0 90 NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
michael@0 91 char16_t *entity = nullptr;
michael@0 92 // do the entity conversion first
michael@0 93 rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
michael@0 94 if(NS_SUCCEEDED(rv)) {
michael@0 95 rv = DoCharsetConversion(entity, _retval);
michael@0 96 nsMemory::Free(entity);
michael@0 97 }
michael@0 98 }
michael@0 99 else
michael@0 100 rv = DoCharsetConversion(inString, _retval);
michael@0 101
michael@0 102 } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
michael@0 103
michael@0 104 return rv;
michael@0 105 }
michael@0 106
michael@0 107 NS_IMETHODIMP
michael@0 108 nsSaveAsCharset::GetCharset(char * *aCharset)
michael@0 109 {
michael@0 110 NS_ENSURE_ARG(aCharset);
michael@0 111 NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
michael@0 112 NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
michael@0 113
michael@0 114 const char* charset = mCharsetList[mCharsetListIndex].get();
michael@0 115 if (!charset) {
michael@0 116 *aCharset = nullptr;
michael@0 117 NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
michael@0 118 return NS_ERROR_FAILURE;
michael@0 119 }
michael@0 120
michael@0 121 *aCharset = strdup(charset);
michael@0 122 return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
michael@0 123 }
michael@0 124
michael@0 125 /////////////////////////////////////////////////////////////////////////////////////////
michael@0 126
michael@0 127 #define RESERVE_FALLBACK_BYTES 512
michael@0 128
michael@0 129 // do the fallback, reallocate the buffer if necessary
michael@0 130 // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
michael@0 131 NS_IMETHODIMP
michael@0 132 nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength,
michael@0 133 int32_t *currentPos, int32_t estimatedLength)
michael@0 134 {
michael@0 135 NS_ENSURE_ARG_POINTER(outString);
michael@0 136 NS_ENSURE_ARG_POINTER(bufferLength);
michael@0 137 NS_ENSURE_ARG_POINTER(currentPos);
michael@0 138
michael@0 139 char fallbackStr[256];
michael@0 140 nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
michael@0 141 if (NS_SUCCEEDED(rv)) {
michael@0 142 int32_t tempLen = (int32_t) strlen(fallbackStr);
michael@0 143
michael@0 144 // reallocate if the buffer is not large enough
michael@0 145 if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
michael@0 146 int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES;
michael@0 147 // + 1 is for the terminating NUL, don't add that to bufferLength
michael@0 148 char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1);
michael@0 149 if (temp) {
michael@0 150 // adjust length/pointer after realloc
michael@0 151 *bufferLength += addLength;
michael@0 152 *outString = temp;
michael@0 153 } else {
michael@0 154 *outString = nullptr;
michael@0 155 *bufferLength = 0;
michael@0 156 return NS_ERROR_OUT_OF_MEMORY;
michael@0 157 }
michael@0 158 }
michael@0 159 memcpy((*outString + *currentPos), fallbackStr, tempLen);
michael@0 160 *currentPos += tempLen;
michael@0 161 }
michael@0 162 return rv;
michael@0 163 }
michael@0 164
michael@0 165 NS_IMETHODIMP
michael@0 166 nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString)
michael@0 167 {
michael@0 168 NS_ENSURE_ARG_POINTER(outString);
michael@0 169
michael@0 170 *outString = nullptr;
michael@0 171
michael@0 172 nsresult rv;
michael@0 173 int32_t inStringLength = NS_strlen(inString); // original input string length
michael@0 174 int32_t bufferLength; // allocated buffer length
michael@0 175 int32_t srcLength = inStringLength;
michael@0 176 int32_t dstLength;
michael@0 177 int32_t pos1, pos2;
michael@0 178 nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
michael@0 179
michael@0 180 // estimate and allocate the target buffer (reserve extra memory for fallback)
michael@0 181 rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
michael@0 182 if (NS_FAILED(rv)) return rv;
michael@0 183
michael@0 184 bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback
michael@0 185 // + 1 is for the terminating NUL -- we don't add that to bufferLength so that
michael@0 186 // we can always write dstPtr[pos2] = '\0' even when the encoder filled the
michael@0 187 // buffer.
michael@0 188 char *dstPtr = (char *) PR_Malloc(bufferLength + 1);
michael@0 189 if (!dstPtr) {
michael@0 190 return NS_ERROR_OUT_OF_MEMORY;
michael@0 191 }
michael@0 192
michael@0 193 for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
michael@0 194 // convert from unicode
michael@0 195 dstLength = bufferLength - pos2;
michael@0 196 NS_ASSERTION(dstLength >= 0, "out of bounds write");
michael@0 197 rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
michael@0 198
michael@0 199 pos1 += srcLength ? srcLength : 1;
michael@0 200 pos2 += dstLength;
michael@0 201 dstPtr[pos2] = '\0';
michael@0 202
michael@0 203 // break: this is usually the case (no error) OR unrecoverable error
michael@0 204 if (NS_ERROR_UENC_NOMAPPING != rv) break;
michael@0 205
michael@0 206 // remember this happened and reset the result
michael@0 207 saveResult = rv;
michael@0 208 rv = NS_OK;
michael@0 209
michael@0 210 // finish encoder, give it a chance to write extra data like escape sequences
michael@0 211 dstLength = bufferLength - pos2;
michael@0 212 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
michael@0 213 if (NS_SUCCEEDED(rv)) {
michael@0 214 pos2 += dstLength;
michael@0 215 dstPtr[pos2] = '\0';
michael@0 216 }
michael@0 217
michael@0 218 srcLength = inStringLength - pos1;
michael@0 219
michael@0 220 // do the fallback
michael@0 221 if (!ATTR_NO_FALLBACK(mAttribute)) {
michael@0 222 uint32_t unMappedChar;
michael@0 223 if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) &&
michael@0 224 inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
michael@0 225 unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
michael@0 226 pos1++;
michael@0 227 } else {
michael@0 228 unMappedChar = inString[pos1-1];
michael@0 229 }
michael@0 230
michael@0 231 rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
michael@0 232 if (NS_FAILED(rv))
michael@0 233 break;
michael@0 234
michael@0 235 rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
michael@0 236 if (NS_FAILED(rv))
michael@0 237 break;
michael@0 238 dstPtr[pos2] = '\0';
michael@0 239 }
michael@0 240 }
michael@0 241
michael@0 242 if (NS_SUCCEEDED(rv)) {
michael@0 243 // finish encoder, give it a chance to write extra data like escape sequences
michael@0 244 dstLength = bufferLength - pos2;
michael@0 245 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
michael@0 246 if (NS_SUCCEEDED(rv)) {
michael@0 247 pos2 += dstLength;
michael@0 248 dstPtr[pos2] = '\0';
michael@0 249 }
michael@0 250 }
michael@0 251
michael@0 252 if (NS_FAILED(rv)) {
michael@0 253 PR_FREEIF(dstPtr);
michael@0 254 return rv;
michael@0 255 }
michael@0 256
michael@0 257 *outString = dstPtr; // set the result string
michael@0 258
michael@0 259 // set error code so that the caller can do own fall back
michael@0 260 if (NS_ERROR_UENC_NOMAPPING == saveResult) {
michael@0 261 rv = NS_ERROR_UENC_NOMAPPING;
michael@0 262 }
michael@0 263
michael@0 264 return rv;
michael@0 265 }
michael@0 266
michael@0 267 NS_IMETHODIMP
michael@0 268 nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength)
michael@0 269 {
michael@0 270 NS_ENSURE_ARG_POINTER(outString);
michael@0 271
michael@0 272 *outString = '\0';
michael@0 273
michael@0 274 nsresult rv = NS_OK;
michael@0 275
michael@0 276 if (ATTR_NO_FALLBACK(mAttribute)) {
michael@0 277 return NS_OK;
michael@0 278 }
michael@0 279 if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
michael@0 280 char *entity = nullptr;
michael@0 281 rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
michael@0 282 if (NS_SUCCEEDED(rv)) {
michael@0 283 if (!entity || (int32_t)strlen(entity) > bufferLength) {
michael@0 284 return NS_ERROR_OUT_OF_MEMORY;
michael@0 285 }
michael@0 286 PL_strcpy(outString, entity);
michael@0 287 nsMemory::Free(entity);
michael@0 288 return rv;
michael@0 289 }
michael@0 290 }
michael@0 291
michael@0 292 switch (MASK_FALLBACK(mAttribute)) {
michael@0 293 case attr_FallbackQuestionMark:
michael@0 294 if(bufferLength>=2) {
michael@0 295 *outString++='?';
michael@0 296 *outString='\0';
michael@0 297 rv = NS_OK;
michael@0 298 } else {
michael@0 299 rv = NS_ERROR_FAILURE;
michael@0 300 }
michael@0 301 break;
michael@0 302 case attr_FallbackEscapeU:
michael@0 303 if (inUCS4 & 0xff0000)
michael@0 304 rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
michael@0 305 else
michael@0 306 rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
michael@0 307 break;
michael@0 308 case attr_FallbackDecimalNCR:
michael@0 309 rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
michael@0 310 break;
michael@0 311 case attr_FallbackHexNCR:
michael@0 312 rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
michael@0 313 break;
michael@0 314 case attr_FallbackNone:
michael@0 315 rv = NS_OK;
michael@0 316 break;
michael@0 317 default:
michael@0 318 rv = NS_ERROR_ILLEGAL_VALUE;
michael@0 319 break;
michael@0 320 }
michael@0 321
michael@0 322 return rv;
michael@0 323 }
michael@0 324
michael@0 325 nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
michael@0 326 {
michael@0 327 NS_ENSURE_ARG(charset);
michael@0 328 nsresult rv;
michael@0 329
michael@0 330 // set up unicode encoder
michael@0 331 nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
michael@0 332 NS_ENSURE_SUCCESS(rv, rv);
michael@0 333
michael@0 334 return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
michael@0 335 }
michael@0 336
michael@0 337 nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
michael@0 338 {
michael@0 339 NS_ENSURE_ARG(charsetList);
michael@0 340
michael@0 341 NS_ASSERTION(charsetList[0], "charsetList should not be empty");
michael@0 342 if (!charsetList[0])
michael@0 343 return NS_ERROR_INVALID_ARG;
michael@0 344
michael@0 345 if (mCharsetListIndex >= 0) {
michael@0 346 mCharsetList.Clear();
michael@0 347 mCharsetListIndex = -1;
michael@0 348 }
michael@0 349
michael@0 350 nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList);
michael@0 351 while (tokenizer.hasMoreTokens()) {
michael@0 352 ParseString(tokenizer.nextToken(), ',', mCharsetList);
michael@0 353 }
michael@0 354
michael@0 355 return NS_OK;
michael@0 356 }
michael@0 357
michael@0 358 const char * nsSaveAsCharset::GetNextCharset()
michael@0 359 {
michael@0 360 if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length()))
michael@0 361 return nullptr;
michael@0 362
michael@0 363 // bump the index and return the next charset
michael@0 364 return mCharsetList[++mCharsetListIndex].get();
michael@0 365 }

mercurial