The Tor Browser: comparison intl/icu/source/common/unistr

--1:000000000000
+:6f8aae642d11
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  unistr_cnv.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2004aug19
+*   created by: Markus W. Scherer
+*
+*   Character conversion functions moved here from unistr.cpp
+*/
+#include "unicode/utypes.h"
+#if !UCONFIG_NO_CONVERSION
+#include "unicode/putil.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "unicode/ustring.h"
+#include "unicode/unistr.h"
+#include "unicode/ucnv.h"
+#include "ucnv_imp.h"
+#include "putilimp.h"
+#include "ustr_cnv.h"
+#include "ustr_imp.h"
+U_NAMESPACE_BEGIN
+//========================================
+// Constructors
+//========================================
+#if !U_CHARSET_IS_UTF8
+UnicodeString::UnicodeString(const char *codepageData)
+: fShortLength(0),
+fFlags(kShortString)
+{
+if(codepageData != 0) {
+doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
+}
+}
+UnicodeString::UnicodeString(const char *codepageData,
+int32_t dataLength)
+: fShortLength(0),
+fFlags(kShortString)
+{
+if(codepageData != 0) {
+doCodepageCreate(codepageData, dataLength, 0);
+}
+}
+// else see unistr.cpp
+#endif
+UnicodeString::UnicodeString(const char *codepageData,
+const char *codepage)
+: fShortLength(0),
+fFlags(kShortString)
+{
+if(codepageData != 0) {
+doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
+}
+}
+UnicodeString::UnicodeString(const char *codepageData,
+int32_t dataLength,
+const char *codepage)
+: fShortLength(0),
+fFlags(kShortString)
+{
+if(codepageData != 0) {
+doCodepageCreate(codepageData, dataLength, codepage);
+}
+}
+UnicodeString::UnicodeString(const char *src, int32_t srcLength,
+UConverter *cnv,
+UErrorCode &errorCode)
+: fShortLength(0),
+fFlags(kShortString)
+{
+if(U_SUCCESS(errorCode)) {
+// check arguments
+if(src==NULL) {
+// treat as an empty string, do nothing more
+} else if(srcLength<-1) {
+errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+} else {
+// get input length
+if(srcLength==-1) {
+srcLength=(int32_t)uprv_strlen(src);
+}
+if(srcLength>0) {
+if(cnv!=0) {
+// use the provided converter
+ucnv_resetToUnicode(cnv);
+doCodepageCreate(src, srcLength, cnv, errorCode);
+} else {
+// use the default converter
+cnv=u_getDefaultConverter(&errorCode);
+doCodepageCreate(src, srcLength, cnv, errorCode);
+u_releaseDefaultConverter(cnv);
+}
+}
+}
+if(U_FAILURE(errorCode)) {
+setToBogus();
+}
+}
+}
+//========================================
+// Codeset conversion
+//========================================
+#if !U_CHARSET_IS_UTF8
+int32_t
+UnicodeString::extract(int32_t start,
+int32_t length,
+char *target,
+uint32_t dstSize) const {
+return extract(start, length, target, dstSize, 0);
+}
+// else see unistr.cpp
+#endif
+int32_t
+UnicodeString::extract(int32_t start,
+int32_t length,
+char *target,
+uint32_t dstSize,
+const char *codepage) const
+{
+// if the arguments are illegal, then do nothing
+if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
+return 0;
+}
+// pin the indices to legal values
+pinIndices(start, length);
+// We need to cast dstSize to int32_t for all subsequent code.
+// I don't know why the API was defined with uint32_t but we are stuck with it.
+// Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
+// as a limit in some functions, it may wrap around and yield a pointer
+// that compares less-than target.
+int32_t capacity;
+if(dstSize < 0x7fffffff) {
+// Assume that the capacity is real and a limit pointer won't wrap around.
+capacity = (int32_t)dstSize;
+} else {
+// Pin the capacity so that a limit pointer does not wrap around.
+char *targetLimit = (char *)U_MAX_PTR(target);
+// U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
+// greater than target and does not wrap around the top of the address space.
+capacity = (int32_t)(targetLimit - target);
+}
+// create the converter
+UConverter *converter;
+UErrorCode status = U_ZERO_ERROR;
+// just write the NUL if the string length is 0
+if(length == 0) {
+return u_terminateChars(target, capacity, 0, &status);
+}
+// if the codepage is the default, use our cache
+// if it is an empty string, then use the "invariant character" conversion
+if (codepage == 0) {
+const char *defaultName = ucnv_getDefaultName();
+if(UCNV_FAST_IS_UTF8(defaultName)) {
+return toUTF8(start, length, target, capacity);
+}
+converter = u_getDefaultConverter(&status);
+} else if (*codepage == 0) {
+// use the "invariant characters" conversion
+int32_t destLength;
+if(length <= capacity) {
+destLength = length;
+} else {
+destLength = capacity;
+}
+u_UCharsToChars(getArrayStart() + start, target, destLength);
+return u_terminateChars(target, capacity, length, &status);
+} else {
+converter = ucnv_open(codepage, &status);
+}
+length = doExtract(start, length, target, capacity, converter, status);
+// close the converter
+if (codepage == 0) {
+u_releaseDefaultConverter(converter);
+} else {
+ucnv_close(converter);
+}
+return length;
+}
+int32_t
+UnicodeString::extract(char *dest, int32_t destCapacity,
+UConverter *cnv,
+UErrorCode &errorCode) const
+{
+if(U_FAILURE(errorCode)) {
+return 0;
+}
+if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
+errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+return 0;
+}
+// nothing to do?
+if(isEmpty()) {
+return u_terminateChars(dest, destCapacity, 0, &errorCode);
+}
+// get the converter
+UBool isDefaultConverter;
+if(cnv==0) {
+isDefaultConverter=TRUE;
+cnv=u_getDefaultConverter(&errorCode);
+if(U_FAILURE(errorCode)) {
+return 0;
+}
+} else {
+isDefaultConverter=FALSE;
+ucnv_resetFromUnicode(cnv);
+}
+// convert
+int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
+// release the converter
+if(isDefaultConverter) {
+u_releaseDefaultConverter(cnv);
+}
+return len;
+}
+int32_t
+UnicodeString::doExtract(int32_t start, int32_t length,
+char *dest, int32_t destCapacity,
+UConverter *cnv,
+UErrorCode &errorCode) const
+{
+if(U_FAILURE(errorCode)) {
+if(destCapacity!=0) {
+*dest=0;
+}
+return 0;
+}
+const UChar *src=getArrayStart()+start, *srcLimit=src+length;
+char *originalDest=dest;
+const char *destLimit;
+if(destCapacity==0) {
+destLimit=dest=0;
+} else if(destCapacity==-1) {
+// Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
+destLimit=(char*)U_MAX_PTR(dest);
+// for NUL-termination, translate into highest int32_t
+destCapacity=0x7fffffff;
+} else {
+destLimit=dest+destCapacity;
+}
+// perform the conversion
+ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
+length=(int32_t)(dest-originalDest);
+// if an overflow occurs, then get the preflighting length
+if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+char buffer[1024];
+destLimit=buffer+sizeof(buffer);
+do {
+dest=buffer;
+errorCode=U_ZERO_ERROR;
+ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
+length+=(int32_t)(dest-buffer);
+} while(errorCode==U_BUFFER_OVERFLOW_ERROR);
+}
+return u_terminateChars(originalDest, destCapacity, length, &errorCode);
+}
+void
+UnicodeString::doCodepageCreate(const char *codepageData,
+int32_t dataLength,
+const char *codepage)
+{
+// if there's nothing to convert, do nothing
+if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
+return;
+}
+if(dataLength == -1) {
+dataLength = (int32_t)uprv_strlen(codepageData);
+}
+UErrorCode status = U_ZERO_ERROR;
+// create the converter
+// if the codepage is the default, use our cache
+// if it is an empty string, then use the "invariant character" conversion
+UConverter *converter;
+if (codepage == 0) {
+const char *defaultName = ucnv_getDefaultName();
+if(UCNV_FAST_IS_UTF8(defaultName)) {
+setToUTF8(StringPiece(codepageData, dataLength));
+return;
+}
+converter = u_getDefaultConverter(&status);
+} else if(*codepage == 0) {
+// use the "invariant characters" conversion
+if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
+u_charsToUChars(codepageData, getArrayStart(), dataLength);
+setLength(dataLength);
+} else {
+setToBogus();
+}
+return;
+} else {
+converter = ucnv_open(codepage, &status);
+}
+// if we failed, set the appropriate flags and return
+if(U_FAILURE(status)) {
+setToBogus();
+return;
+}
+// perform the conversion
+doCodepageCreate(codepageData, dataLength, converter, status);
+if(U_FAILURE(status)) {
+setToBogus();
+}
+// close the converter
+if(codepage == 0) {
+u_releaseDefaultConverter(converter);
+} else {
+ucnv_close(converter);
+}
+}
+void
+UnicodeString::doCodepageCreate(const char *codepageData,
+int32_t dataLength,
+UConverter *converter,
+UErrorCode &status)
+{
+if(U_FAILURE(status)) {
+return;
+}
+// set up the conversion parameters
+const char *mySource     = codepageData;
+const char *mySourceEnd  = mySource + dataLength;
+UChar *array, *myTarget;
+// estimate the size needed:
+int32_t arraySize;
+if(dataLength <= US_STACKBUF_SIZE) {
+// try to use the stack buffer
+arraySize = US_STACKBUF_SIZE;
+} else {
+// 1.25 UChar's per source byte should cover most cases
+arraySize = dataLength + (dataLength >> 2);
+}
+// we do not care about the current contents
+UBool doCopyArray = FALSE;
+for(;;) {
+if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
+setToBogus();
+break;
+}
+// perform the conversion
+array = getArrayStart();
+myTarget = array + length();
+ucnv_toUnicode(converter, &myTarget,  array + getCapacity(),
+&mySource, mySourceEnd, 0, TRUE, &status);
+// update the conversion parameters
+setLength((int32_t)(myTarget - array));
+// allocate more space and copy data, if needed
+if(status == U_BUFFER_OVERFLOW_ERROR) {
+// reset the error code
+status = U_ZERO_ERROR;
+// keep the previous conversion results
+doCopyArray = TRUE;
+// estimate the new size needed, larger than before
+// try 2 UChar's per remaining source byte
+arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
+} else {
+break;
+}
+}
+}
+U_NAMESPACE_END
+#endif

The Tor Browser / file comparison

comparison: intl/icu/source/common/unistr_cnv.cpp

intl/icu/source/common/unistr_cnv.cpp