Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
michael@0 | 3 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | |
michael@0 | 8 | #include "prmem.h" |
michael@0 | 9 | #include "prprf.h" |
michael@0 | 10 | #include "nsICharsetConverterManager.h" |
michael@0 | 11 | #include "nsSaveAsCharset.h" |
michael@0 | 12 | #include "nsWhitespaceTokenizer.h" |
michael@0 | 13 | #include "nsServiceManagerUtils.h" |
michael@0 | 14 | |
michael@0 | 15 | // |
michael@0 | 16 | // nsISupports methods |
michael@0 | 17 | // |
michael@0 | 18 | NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset) |
michael@0 | 19 | |
michael@0 | 20 | // |
michael@0 | 21 | // nsSaveAsCharset |
michael@0 | 22 | // |
michael@0 | 23 | nsSaveAsCharset::nsSaveAsCharset() |
michael@0 | 24 | { |
michael@0 | 25 | mAttribute = attr_htmlTextDefault; |
michael@0 | 26 | mEntityVersion = 0; |
michael@0 | 27 | mCharsetListIndex = -1; |
michael@0 | 28 | } |
michael@0 | 29 | |
michael@0 | 30 | nsSaveAsCharset::~nsSaveAsCharset() |
michael@0 | 31 | { |
michael@0 | 32 | } |
michael@0 | 33 | |
michael@0 | 34 | NS_IMETHODIMP |
michael@0 | 35 | nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion) |
michael@0 | 36 | { |
michael@0 | 37 | nsresult rv = NS_OK; |
michael@0 | 38 | |
michael@0 | 39 | mAttribute = attr; |
michael@0 | 40 | mEntityVersion = entityVersion; |
michael@0 | 41 | |
michael@0 | 42 | rv = SetupCharsetList(charset); |
michael@0 | 43 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 44 | |
michael@0 | 45 | // set up unicode encoder |
michael@0 | 46 | rv = SetupUnicodeEncoder(GetNextCharset()); |
michael@0 | 47 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 48 | |
michael@0 | 49 | // set up entity converter |
michael@0 | 50 | if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter) |
michael@0 | 51 | mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv); |
michael@0 | 52 | |
michael@0 | 53 | return rv; |
michael@0 | 54 | } |
michael@0 | 55 | |
michael@0 | 56 | NS_IMETHODIMP |
michael@0 | 57 | nsSaveAsCharset::Convert(const char16_t *inString, char **_retval) |
michael@0 | 58 | { |
michael@0 | 59 | NS_ENSURE_ARG_POINTER(_retval); |
michael@0 | 60 | NS_ENSURE_ARG_POINTER(inString); |
michael@0 | 61 | if (0 == *inString) |
michael@0 | 62 | return NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 63 | nsresult rv = NS_OK; |
michael@0 | 64 | |
michael@0 | 65 | NS_ASSERTION(mEncoder, "need to call Init() before Convert()"); |
michael@0 | 66 | NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE); |
michael@0 | 67 | |
michael@0 | 68 | *_retval = nullptr; |
michael@0 | 69 | |
michael@0 | 70 | // make sure to start from the first charset in the list |
michael@0 | 71 | if (mCharsetListIndex > 0) { |
michael@0 | 72 | mCharsetListIndex = -1; |
michael@0 | 73 | rv = SetupUnicodeEncoder(GetNextCharset()); |
michael@0 | 74 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 75 | } |
michael@0 | 76 | |
michael@0 | 77 | do { |
michael@0 | 78 | // fallback to the next charset in the list if the last conversion failed by an unmapped character |
michael@0 | 79 | if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) { |
michael@0 | 80 | const char * charset = GetNextCharset(); |
michael@0 | 81 | if (!charset) |
michael@0 | 82 | break; |
michael@0 | 83 | rv = SetupUnicodeEncoder(charset); |
michael@0 | 84 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 85 | PR_FREEIF(*_retval); |
michael@0 | 86 | } |
michael@0 | 87 | |
michael@0 | 88 | if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) { |
michael@0 | 89 | NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()"); |
michael@0 | 90 | NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE); |
michael@0 | 91 | char16_t *entity = nullptr; |
michael@0 | 92 | // do the entity conversion first |
michael@0 | 93 | rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity); |
michael@0 | 94 | if(NS_SUCCEEDED(rv)) { |
michael@0 | 95 | rv = DoCharsetConversion(entity, _retval); |
michael@0 | 96 | nsMemory::Free(entity); |
michael@0 | 97 | } |
michael@0 | 98 | } |
michael@0 | 99 | else |
michael@0 | 100 | rv = DoCharsetConversion(inString, _retval); |
michael@0 | 101 | |
michael@0 | 102 | } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv); |
michael@0 | 103 | |
michael@0 | 104 | return rv; |
michael@0 | 105 | } |
michael@0 | 106 | |
michael@0 | 107 | NS_IMETHODIMP |
michael@0 | 108 | nsSaveAsCharset::GetCharset(char * *aCharset) |
michael@0 | 109 | { |
michael@0 | 110 | NS_ENSURE_ARG(aCharset); |
michael@0 | 111 | NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first"); |
michael@0 | 112 | NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE); |
michael@0 | 113 | |
michael@0 | 114 | const char* charset = mCharsetList[mCharsetListIndex].get(); |
michael@0 | 115 | if (!charset) { |
michael@0 | 116 | *aCharset = nullptr; |
michael@0 | 117 | NS_ASSERTION(charset, "make sure to call Init() with non empty charset list"); |
michael@0 | 118 | return NS_ERROR_FAILURE; |
michael@0 | 119 | } |
michael@0 | 120 | |
michael@0 | 121 | *aCharset = strdup(charset); |
michael@0 | 122 | return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 123 | } |
michael@0 | 124 | |
michael@0 | 125 | ///////////////////////////////////////////////////////////////////////////////////////// |
michael@0 | 126 | |
michael@0 | 127 | #define RESERVE_FALLBACK_BYTES 512 |
michael@0 | 128 | |
michael@0 | 129 | // do the fallback, reallocate the buffer if necessary |
michael@0 | 130 | // need to pass destination buffer info (size, current position and estimation of rest of the conversion) |
michael@0 | 131 | NS_IMETHODIMP |
michael@0 | 132 | nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, |
michael@0 | 133 | int32_t *currentPos, int32_t estimatedLength) |
michael@0 | 134 | { |
michael@0 | 135 | NS_ENSURE_ARG_POINTER(outString); |
michael@0 | 136 | NS_ENSURE_ARG_POINTER(bufferLength); |
michael@0 | 137 | NS_ENSURE_ARG_POINTER(currentPos); |
michael@0 | 138 | |
michael@0 | 139 | char fallbackStr[256]; |
michael@0 | 140 | nsresult rv = DoConversionFallBack(character, fallbackStr, 256); |
michael@0 | 141 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 142 | int32_t tempLen = (int32_t) strlen(fallbackStr); |
michael@0 | 143 | |
michael@0 | 144 | // reallocate if the buffer is not large enough |
michael@0 | 145 | if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) { |
michael@0 | 146 | int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES; |
michael@0 | 147 | // + 1 is for the terminating NUL, don't add that to bufferLength |
michael@0 | 148 | char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1); |
michael@0 | 149 | if (temp) { |
michael@0 | 150 | // adjust length/pointer after realloc |
michael@0 | 151 | *bufferLength += addLength; |
michael@0 | 152 | *outString = temp; |
michael@0 | 153 | } else { |
michael@0 | 154 | *outString = nullptr; |
michael@0 | 155 | *bufferLength = 0; |
michael@0 | 156 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 157 | } |
michael@0 | 158 | } |
michael@0 | 159 | memcpy((*outString + *currentPos), fallbackStr, tempLen); |
michael@0 | 160 | *currentPos += tempLen; |
michael@0 | 161 | } |
michael@0 | 162 | return rv; |
michael@0 | 163 | } |
michael@0 | 164 | |
michael@0 | 165 | NS_IMETHODIMP |
michael@0 | 166 | nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString) |
michael@0 | 167 | { |
michael@0 | 168 | NS_ENSURE_ARG_POINTER(outString); |
michael@0 | 169 | |
michael@0 | 170 | *outString = nullptr; |
michael@0 | 171 | |
michael@0 | 172 | nsresult rv; |
michael@0 | 173 | int32_t inStringLength = NS_strlen(inString); // original input string length |
michael@0 | 174 | int32_t bufferLength; // allocated buffer length |
michael@0 | 175 | int32_t srcLength = inStringLength; |
michael@0 | 176 | int32_t dstLength; |
michael@0 | 177 | int32_t pos1, pos2; |
michael@0 | 178 | nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING |
michael@0 | 179 | |
michael@0 | 180 | // estimate and allocate the target buffer (reserve extra memory for fallback) |
michael@0 | 181 | rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); |
michael@0 | 182 | if (NS_FAILED(rv)) return rv; |
michael@0 | 183 | |
michael@0 | 184 | bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback |
michael@0 | 185 | // + 1 is for the terminating NUL -- we don't add that to bufferLength so that |
michael@0 | 186 | // we can always write dstPtr[pos2] = '\0' even when the encoder filled the |
michael@0 | 187 | // buffer. |
michael@0 | 188 | char *dstPtr = (char *) PR_Malloc(bufferLength + 1); |
michael@0 | 189 | if (!dstPtr) { |
michael@0 | 190 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 191 | } |
michael@0 | 192 | |
michael@0 | 193 | for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { |
michael@0 | 194 | // convert from unicode |
michael@0 | 195 | dstLength = bufferLength - pos2; |
michael@0 | 196 | NS_ASSERTION(dstLength >= 0, "out of bounds write"); |
michael@0 | 197 | rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); |
michael@0 | 198 | |
michael@0 | 199 | pos1 += srcLength ? srcLength : 1; |
michael@0 | 200 | pos2 += dstLength; |
michael@0 | 201 | dstPtr[pos2] = '\0'; |
michael@0 | 202 | |
michael@0 | 203 | // break: this is usually the case (no error) OR unrecoverable error |
michael@0 | 204 | if (NS_ERROR_UENC_NOMAPPING != rv) break; |
michael@0 | 205 | |
michael@0 | 206 | // remember this happened and reset the result |
michael@0 | 207 | saveResult = rv; |
michael@0 | 208 | rv = NS_OK; |
michael@0 | 209 | |
michael@0 | 210 | // finish encoder, give it a chance to write extra data like escape sequences |
michael@0 | 211 | dstLength = bufferLength - pos2; |
michael@0 | 212 | rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); |
michael@0 | 213 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 214 | pos2 += dstLength; |
michael@0 | 215 | dstPtr[pos2] = '\0'; |
michael@0 | 216 | } |
michael@0 | 217 | |
michael@0 | 218 | srcLength = inStringLength - pos1; |
michael@0 | 219 | |
michael@0 | 220 | // do the fallback |
michael@0 | 221 | if (!ATTR_NO_FALLBACK(mAttribute)) { |
michael@0 | 222 | uint32_t unMappedChar; |
michael@0 | 223 | if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && |
michael@0 | 224 | inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { |
michael@0 | 225 | unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); |
michael@0 | 226 | pos1++; |
michael@0 | 227 | } else { |
michael@0 | 228 | unMappedChar = inString[pos1-1]; |
michael@0 | 229 | } |
michael@0 | 230 | |
michael@0 | 231 | rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); |
michael@0 | 232 | if (NS_FAILED(rv)) |
michael@0 | 233 | break; |
michael@0 | 234 | |
michael@0 | 235 | rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); |
michael@0 | 236 | if (NS_FAILED(rv)) |
michael@0 | 237 | break; |
michael@0 | 238 | dstPtr[pos2] = '\0'; |
michael@0 | 239 | } |
michael@0 | 240 | } |
michael@0 | 241 | |
michael@0 | 242 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 243 | // finish encoder, give it a chance to write extra data like escape sequences |
michael@0 | 244 | dstLength = bufferLength - pos2; |
michael@0 | 245 | rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); |
michael@0 | 246 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 247 | pos2 += dstLength; |
michael@0 | 248 | dstPtr[pos2] = '\0'; |
michael@0 | 249 | } |
michael@0 | 250 | } |
michael@0 | 251 | |
michael@0 | 252 | if (NS_FAILED(rv)) { |
michael@0 | 253 | PR_FREEIF(dstPtr); |
michael@0 | 254 | return rv; |
michael@0 | 255 | } |
michael@0 | 256 | |
michael@0 | 257 | *outString = dstPtr; // set the result string |
michael@0 | 258 | |
michael@0 | 259 | // set error code so that the caller can do own fall back |
michael@0 | 260 | if (NS_ERROR_UENC_NOMAPPING == saveResult) { |
michael@0 | 261 | rv = NS_ERROR_UENC_NOMAPPING; |
michael@0 | 262 | } |
michael@0 | 263 | |
michael@0 | 264 | return rv; |
michael@0 | 265 | } |
michael@0 | 266 | |
michael@0 | 267 | NS_IMETHODIMP |
michael@0 | 268 | nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength) |
michael@0 | 269 | { |
michael@0 | 270 | NS_ENSURE_ARG_POINTER(outString); |
michael@0 | 271 | |
michael@0 | 272 | *outString = '\0'; |
michael@0 | 273 | |
michael@0 | 274 | nsresult rv = NS_OK; |
michael@0 | 275 | |
michael@0 | 276 | if (ATTR_NO_FALLBACK(mAttribute)) { |
michael@0 | 277 | return NS_OK; |
michael@0 | 278 | } |
michael@0 | 279 | if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) { |
michael@0 | 280 | char *entity = nullptr; |
michael@0 | 281 | rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity); |
michael@0 | 282 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 283 | if (!entity || (int32_t)strlen(entity) > bufferLength) { |
michael@0 | 284 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 285 | } |
michael@0 | 286 | PL_strcpy(outString, entity); |
michael@0 | 287 | nsMemory::Free(entity); |
michael@0 | 288 | return rv; |
michael@0 | 289 | } |
michael@0 | 290 | } |
michael@0 | 291 | |
michael@0 | 292 | switch (MASK_FALLBACK(mAttribute)) { |
michael@0 | 293 | case attr_FallbackQuestionMark: |
michael@0 | 294 | if(bufferLength>=2) { |
michael@0 | 295 | *outString++='?'; |
michael@0 | 296 | *outString='\0'; |
michael@0 | 297 | rv = NS_OK; |
michael@0 | 298 | } else { |
michael@0 | 299 | rv = NS_ERROR_FAILURE; |
michael@0 | 300 | } |
michael@0 | 301 | break; |
michael@0 | 302 | case attr_FallbackEscapeU: |
michael@0 | 303 | if (inUCS4 & 0xff0000) |
michael@0 | 304 | rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
michael@0 | 305 | else |
michael@0 | 306 | rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
michael@0 | 307 | break; |
michael@0 | 308 | case attr_FallbackDecimalNCR: |
michael@0 | 309 | rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
michael@0 | 310 | break; |
michael@0 | 311 | case attr_FallbackHexNCR: |
michael@0 | 312 | rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
michael@0 | 313 | break; |
michael@0 | 314 | case attr_FallbackNone: |
michael@0 | 315 | rv = NS_OK; |
michael@0 | 316 | break; |
michael@0 | 317 | default: |
michael@0 | 318 | rv = NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 319 | break; |
michael@0 | 320 | } |
michael@0 | 321 | |
michael@0 | 322 | return rv; |
michael@0 | 323 | } |
michael@0 | 324 | |
michael@0 | 325 | nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset) |
michael@0 | 326 | { |
michael@0 | 327 | NS_ENSURE_ARG(charset); |
michael@0 | 328 | nsresult rv; |
michael@0 | 329 | |
michael@0 | 330 | // set up unicode encoder |
michael@0 | 331 | nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); |
michael@0 | 332 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 333 | |
michael@0 | 334 | return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder)); |
michael@0 | 335 | } |
michael@0 | 336 | |
michael@0 | 337 | nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList) |
michael@0 | 338 | { |
michael@0 | 339 | NS_ENSURE_ARG(charsetList); |
michael@0 | 340 | |
michael@0 | 341 | NS_ASSERTION(charsetList[0], "charsetList should not be empty"); |
michael@0 | 342 | if (!charsetList[0]) |
michael@0 | 343 | return NS_ERROR_INVALID_ARG; |
michael@0 | 344 | |
michael@0 | 345 | if (mCharsetListIndex >= 0) { |
michael@0 | 346 | mCharsetList.Clear(); |
michael@0 | 347 | mCharsetListIndex = -1; |
michael@0 | 348 | } |
michael@0 | 349 | |
michael@0 | 350 | nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList); |
michael@0 | 351 | while (tokenizer.hasMoreTokens()) { |
michael@0 | 352 | ParseString(tokenizer.nextToken(), ',', mCharsetList); |
michael@0 | 353 | } |
michael@0 | 354 | |
michael@0 | 355 | return NS_OK; |
michael@0 | 356 | } |
michael@0 | 357 | |
michael@0 | 358 | const char * nsSaveAsCharset::GetNextCharset() |
michael@0 | 359 | { |
michael@0 | 360 | if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length())) |
michael@0 | 361 | return nullptr; |
michael@0 | 362 | |
michael@0 | 363 | // bump the index and return the next charset |
michael@0 | 364 | return mCharsetList[++mCharsetListIndex].get(); |
michael@0 | 365 | } |