intl/icu/source/common/usc_impl.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 1999-2009, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 *
michael@0 7 * File USC_IMPL.C
michael@0 8 *
michael@0 9 * Modification History:
michael@0 10 *
michael@0 11 * Date Name Description
michael@0 12 * 07/08/2002 Eric Mader Creation.
michael@0 13 ******************************************************************************
michael@0 14 */
michael@0 15
michael@0 16 #include "unicode/uscript.h"
michael@0 17 #include "usc_impl.h"
michael@0 18 #include "cmemory.h"
michael@0 19
michael@0 20 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
michael@0 21
michael@0 22 #define PAREN_STACK_DEPTH 32
michael@0 23
michael@0 24 #define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
michael@0 25 #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
michael@0 26 #define INC(sp,count) (MOD((sp) + (count)))
michael@0 27 #define INC1(sp) (INC(sp, 1))
michael@0 28 #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
michael@0 29 #define DEC1(sp) (DEC(sp, 1))
michael@0 30 #define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0)
michael@0 31 #define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun))
michael@0 32 #define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP])
michael@0 33 #define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0)
michael@0 34
michael@0 35 struct ParenStackEntry
michael@0 36 {
michael@0 37 int32_t pairIndex;
michael@0 38 UScriptCode scriptCode;
michael@0 39 };
michael@0 40
michael@0 41 struct UScriptRun
michael@0 42 {
michael@0 43 int32_t textLength;
michael@0 44 const UChar *textArray;
michael@0 45
michael@0 46 int32_t scriptStart;
michael@0 47 int32_t scriptLimit;
michael@0 48 UScriptCode scriptCode;
michael@0 49
michael@0 50 struct ParenStackEntry parenStack[PAREN_STACK_DEPTH];
michael@0 51 int32_t parenSP;
michael@0 52 int32_t pushCount;
michael@0 53 int32_t fixupCount;
michael@0 54 };
michael@0 55
michael@0 56 static int8_t highBit(int32_t value);
michael@0 57
michael@0 58 static const UChar32 pairedChars[] = {
michael@0 59 0x0028, 0x0029, /* ascii paired punctuation */
michael@0 60 0x003c, 0x003e,
michael@0 61 0x005b, 0x005d,
michael@0 62 0x007b, 0x007d,
michael@0 63 0x00ab, 0x00bb, /* guillemets */
michael@0 64 0x2018, 0x2019, /* general punctuation */
michael@0 65 0x201c, 0x201d,
michael@0 66 0x2039, 0x203a,
michael@0 67 0x3008, 0x3009, /* chinese paired punctuation */
michael@0 68 0x300a, 0x300b,
michael@0 69 0x300c, 0x300d,
michael@0 70 0x300e, 0x300f,
michael@0 71 0x3010, 0x3011,
michael@0 72 0x3014, 0x3015,
michael@0 73 0x3016, 0x3017,
michael@0 74 0x3018, 0x3019,
michael@0 75 0x301a, 0x301b
michael@0 76 };
michael@0 77
michael@0 78 static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode)
michael@0 79 {
michael@0 80 scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount);
michael@0 81 scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount);
michael@0 82
michael@0 83 scriptRun->parenSP = INC1(scriptRun->parenSP);
michael@0 84 scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex;
michael@0 85 scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode;
michael@0 86 }
michael@0 87
michael@0 88 static void pop(UScriptRun *scriptRun)
michael@0 89 {
michael@0 90 if (STACK_IS_EMPTY(scriptRun)) {
michael@0 91 return;
michael@0 92 }
michael@0 93
michael@0 94 if (scriptRun->fixupCount > 0) {
michael@0 95 scriptRun->fixupCount -= 1;
michael@0 96 }
michael@0 97
michael@0 98 scriptRun->pushCount -= 1;
michael@0 99 scriptRun->parenSP = DEC1(scriptRun->parenSP);
michael@0 100
michael@0 101 /* If the stack is now empty, reset the stack
michael@0 102 pointers to their initial values.
michael@0 103 */
michael@0 104 if (STACK_IS_EMPTY(scriptRun)) {
michael@0 105 scriptRun->parenSP = -1;
michael@0 106 }
michael@0 107 }
michael@0 108
michael@0 109 static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode)
michael@0 110 {
michael@0 111 int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount);
michael@0 112
michael@0 113 while (scriptRun->fixupCount-- > 0) {
michael@0 114 fixupSP = INC1(fixupSP);
michael@0 115 scriptRun->parenStack[fixupSP].scriptCode = scriptCode;
michael@0 116 }
michael@0 117 }
michael@0 118
michael@0 119 static int8_t
michael@0 120 highBit(int32_t value)
michael@0 121 {
michael@0 122 int8_t bit = 0;
michael@0 123
michael@0 124 if (value <= 0) {
michael@0 125 return -32;
michael@0 126 }
michael@0 127
michael@0 128 if (value >= 1 << 16) {
michael@0 129 value >>= 16;
michael@0 130 bit += 16;
michael@0 131 }
michael@0 132
michael@0 133 if (value >= 1 << 8) {
michael@0 134 value >>= 8;
michael@0 135 bit += 8;
michael@0 136 }
michael@0 137
michael@0 138 if (value >= 1 << 4) {
michael@0 139 value >>= 4;
michael@0 140 bit += 4;
michael@0 141 }
michael@0 142
michael@0 143 if (value >= 1 << 2) {
michael@0 144 value >>= 2;
michael@0 145 bit += 2;
michael@0 146 }
michael@0 147
michael@0 148 if (value >= 1 << 1) {
michael@0 149 value >>= 1;
michael@0 150 bit += 1;
michael@0 151 }
michael@0 152
michael@0 153 return bit;
michael@0 154 }
michael@0 155
michael@0 156 static int32_t
michael@0 157 getPairIndex(UChar32 ch)
michael@0 158 {
michael@0 159 int32_t pairedCharCount = ARRAY_SIZE(pairedChars);
michael@0 160 int32_t pairedCharPower = 1 << highBit(pairedCharCount);
michael@0 161 int32_t pairedCharExtra = pairedCharCount - pairedCharPower;
michael@0 162
michael@0 163 int32_t probe = pairedCharPower;
michael@0 164 int32_t pairIndex = 0;
michael@0 165
michael@0 166 if (ch >= pairedChars[pairedCharExtra]) {
michael@0 167 pairIndex = pairedCharExtra;
michael@0 168 }
michael@0 169
michael@0 170 while (probe > (1 << 0)) {
michael@0 171 probe >>= 1;
michael@0 172
michael@0 173 if (ch >= pairedChars[pairIndex + probe]) {
michael@0 174 pairIndex += probe;
michael@0 175 }
michael@0 176 }
michael@0 177
michael@0 178 if (pairedChars[pairIndex] != ch) {
michael@0 179 pairIndex = -1;
michael@0 180 }
michael@0 181
michael@0 182 return pairIndex;
michael@0 183 }
michael@0 184
michael@0 185 static UBool
michael@0 186 sameScript(UScriptCode scriptOne, UScriptCode scriptTwo)
michael@0 187 {
michael@0 188 return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
michael@0 189 }
michael@0 190
michael@0 191 U_CAPI UScriptRun * U_EXPORT2
michael@0 192 uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode)
michael@0 193 {
michael@0 194 UScriptRun *result = NULL;
michael@0 195
michael@0 196 if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
michael@0 197 return NULL;
michael@0 198 }
michael@0 199
michael@0 200 result = uprv_malloc(sizeof (UScriptRun));
michael@0 201
michael@0 202 if (result == NULL) {
michael@0 203 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
michael@0 204 return NULL;
michael@0 205 }
michael@0 206
michael@0 207 uscript_setRunText(result, src, length, pErrorCode);
michael@0 208
michael@0 209 /* Release the UScriptRun if uscript_setRunText() returns an error */
michael@0 210 if (U_FAILURE(*pErrorCode)) {
michael@0 211 uprv_free(result);
michael@0 212 result = NULL;
michael@0 213 }
michael@0 214
michael@0 215 return result;
michael@0 216 }
michael@0 217
michael@0 218 U_CAPI void U_EXPORT2
michael@0 219 uscript_closeRun(UScriptRun *scriptRun)
michael@0 220 {
michael@0 221 if (scriptRun != NULL) {
michael@0 222 uprv_free(scriptRun);
michael@0 223 }
michael@0 224 }
michael@0 225
michael@0 226 U_CAPI void U_EXPORT2
michael@0 227 uscript_resetRun(UScriptRun *scriptRun)
michael@0 228 {
michael@0 229 if (scriptRun != NULL) {
michael@0 230 scriptRun->scriptStart = 0;
michael@0 231 scriptRun->scriptLimit = 0;
michael@0 232 scriptRun->scriptCode = USCRIPT_INVALID_CODE;
michael@0 233 scriptRun->parenSP = -1;
michael@0 234 scriptRun->pushCount = 0;
michael@0 235 scriptRun->fixupCount = 0;
michael@0 236 }
michael@0 237 }
michael@0 238
michael@0 239 U_CAPI void U_EXPORT2
michael@0 240 uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode)
michael@0 241 {
michael@0 242 if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
michael@0 243 return;
michael@0 244 }
michael@0 245
michael@0 246 if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) {
michael@0 247 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 248 return;
michael@0 249 }
michael@0 250
michael@0 251 scriptRun->textArray = src;
michael@0 252 scriptRun->textLength = length;
michael@0 253
michael@0 254 uscript_resetRun(scriptRun);
michael@0 255 }
michael@0 256
michael@0 257 U_CAPI UBool U_EXPORT2
michael@0 258 uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript)
michael@0 259 {
michael@0 260 UErrorCode error = U_ZERO_ERROR;
michael@0 261
michael@0 262 /* if we've fallen off the end of the text, we're done */
michael@0 263 if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) {
michael@0 264 return FALSE;
michael@0 265 }
michael@0 266
michael@0 267 SYNC_FIXUP(scriptRun);
michael@0 268 scriptRun->scriptCode = USCRIPT_COMMON;
michael@0 269
michael@0 270 for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) {
michael@0 271 UChar high = scriptRun->textArray[scriptRun->scriptLimit];
michael@0 272 UChar32 ch = high;
michael@0 273 UScriptCode sc;
michael@0 274 int32_t pairIndex;
michael@0 275
michael@0 276 /*
michael@0 277 * if the character is a high surrogate and it's not the last one
michael@0 278 * in the text, see if it's followed by a low surrogate
michael@0 279 */
michael@0 280 if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) {
michael@0 281 UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1];
michael@0 282
michael@0 283 /*
michael@0 284 * if it is followed by a low surrogate,
michael@0 285 * consume it and form the full character
michael@0 286 */
michael@0 287 if (low >= 0xDC00 && low <= 0xDFFF) {
michael@0 288 ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
michael@0 289 scriptRun->scriptLimit += 1;
michael@0 290 }
michael@0 291 }
michael@0 292
michael@0 293 sc = uscript_getScript(ch, &error);
michael@0 294 pairIndex = getPairIndex(ch);
michael@0 295
michael@0 296 /*
michael@0 297 * Paired character handling:
michael@0 298 *
michael@0 299 * if it's an open character, push it onto the stack.
michael@0 300 * if it's a close character, find the matching open on the
michael@0 301 * stack, and use that script code. Any non-matching open
michael@0 302 * characters above it on the stack will be poped.
michael@0 303 */
michael@0 304 if (pairIndex >= 0) {
michael@0 305 if ((pairIndex & 1) == 0) {
michael@0 306 push(scriptRun, pairIndex, scriptRun->scriptCode);
michael@0 307 } else {
michael@0 308 int32_t pi = pairIndex & ~1;
michael@0 309
michael@0 310 while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) {
michael@0 311 pop(scriptRun);
michael@0 312 }
michael@0 313
michael@0 314 if (STACK_IS_NOT_EMPTY(scriptRun)) {
michael@0 315 sc = TOP(scriptRun).scriptCode;
michael@0 316 }
michael@0 317 }
michael@0 318 }
michael@0 319
michael@0 320 if (sameScript(scriptRun->scriptCode, sc)) {
michael@0 321 if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
michael@0 322 scriptRun->scriptCode = sc;
michael@0 323
michael@0 324 fixup(scriptRun, scriptRun->scriptCode);
michael@0 325 }
michael@0 326
michael@0 327 /*
michael@0 328 * if this character is a close paired character,
michael@0 329 * pop the matching open character from the stack
michael@0 330 */
michael@0 331 if (pairIndex >= 0 && (pairIndex & 1) != 0) {
michael@0 332 pop(scriptRun);
michael@0 333 }
michael@0 334 } else {
michael@0 335 /*
michael@0 336 * if the run broke on a surrogate pair,
michael@0 337 * end it before the high surrogate
michael@0 338 */
michael@0 339 if (ch >= 0x10000) {
michael@0 340 scriptRun->scriptLimit -= 1;
michael@0 341 }
michael@0 342
michael@0 343 break;
michael@0 344 }
michael@0 345 }
michael@0 346
michael@0 347
michael@0 348 if (pRunStart != NULL) {
michael@0 349 *pRunStart = scriptRun->scriptStart;
michael@0 350 }
michael@0 351
michael@0 352 if (pRunLimit != NULL) {
michael@0 353 *pRunLimit = scriptRun->scriptLimit;
michael@0 354 }
michael@0 355
michael@0 356 if (pRunScript != NULL) {
michael@0 357 *pRunScript = scriptRun->scriptCode;
michael@0 358 }
michael@0 359
michael@0 360 return TRUE;
michael@0 361 }

mercurial