1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/usc_impl.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,361 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1999-2009, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* 1.10 +* File USC_IMPL.C 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 07/08/2002 Eric Mader Creation. 1.16 +****************************************************************************** 1.17 +*/ 1.18 + 1.19 +#include "unicode/uscript.h" 1.20 +#include "usc_impl.h" 1.21 +#include "cmemory.h" 1.22 + 1.23 +#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 1.24 + 1.25 +#define PAREN_STACK_DEPTH 32 1.26 + 1.27 +#define MOD(sp) ((sp) % PAREN_STACK_DEPTH) 1.28 +#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) 1.29 +#define INC(sp,count) (MOD((sp) + (count))) 1.30 +#define INC1(sp) (INC(sp, 1)) 1.31 +#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) 1.32 +#define DEC1(sp) (DEC(sp, 1)) 1.33 +#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0) 1.34 +#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun)) 1.35 +#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP]) 1.36 +#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0) 1.37 + 1.38 +struct ParenStackEntry 1.39 +{ 1.40 + int32_t pairIndex; 1.41 + UScriptCode scriptCode; 1.42 +}; 1.43 + 1.44 +struct UScriptRun 1.45 +{ 1.46 + int32_t textLength; 1.47 + const UChar *textArray; 1.48 + 1.49 + int32_t scriptStart; 1.50 + int32_t scriptLimit; 1.51 + UScriptCode scriptCode; 1.52 + 1.53 + struct ParenStackEntry parenStack[PAREN_STACK_DEPTH]; 1.54 + int32_t parenSP; 1.55 + int32_t pushCount; 1.56 + int32_t fixupCount; 1.57 +}; 1.58 + 1.59 +static int8_t highBit(int32_t value); 1.60 + 1.61 +static const UChar32 pairedChars[] = { 1.62 + 0x0028, 0x0029, /* ascii paired punctuation */ 1.63 + 0x003c, 0x003e, 1.64 + 0x005b, 0x005d, 1.65 + 0x007b, 0x007d, 1.66 + 0x00ab, 0x00bb, /* guillemets */ 1.67 + 0x2018, 0x2019, /* general punctuation */ 1.68 + 0x201c, 0x201d, 1.69 + 0x2039, 0x203a, 1.70 + 0x3008, 0x3009, /* chinese paired punctuation */ 1.71 + 0x300a, 0x300b, 1.72 + 0x300c, 0x300d, 1.73 + 0x300e, 0x300f, 1.74 + 0x3010, 0x3011, 1.75 + 0x3014, 0x3015, 1.76 + 0x3016, 0x3017, 1.77 + 0x3018, 0x3019, 1.78 + 0x301a, 0x301b 1.79 +}; 1.80 + 1.81 +static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode) 1.82 +{ 1.83 + scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount); 1.84 + scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount); 1.85 + 1.86 + scriptRun->parenSP = INC1(scriptRun->parenSP); 1.87 + scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex; 1.88 + scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode; 1.89 +} 1.90 + 1.91 +static void pop(UScriptRun *scriptRun) 1.92 +{ 1.93 + if (STACK_IS_EMPTY(scriptRun)) { 1.94 + return; 1.95 + } 1.96 + 1.97 + if (scriptRun->fixupCount > 0) { 1.98 + scriptRun->fixupCount -= 1; 1.99 + } 1.100 + 1.101 + scriptRun->pushCount -= 1; 1.102 + scriptRun->parenSP = DEC1(scriptRun->parenSP); 1.103 + 1.104 + /* If the stack is now empty, reset the stack 1.105 + pointers to their initial values. 1.106 + */ 1.107 + if (STACK_IS_EMPTY(scriptRun)) { 1.108 + scriptRun->parenSP = -1; 1.109 + } 1.110 +} 1.111 + 1.112 +static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode) 1.113 +{ 1.114 + int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount); 1.115 + 1.116 + while (scriptRun->fixupCount-- > 0) { 1.117 + fixupSP = INC1(fixupSP); 1.118 + scriptRun->parenStack[fixupSP].scriptCode = scriptCode; 1.119 + } 1.120 +} 1.121 + 1.122 +static int8_t 1.123 +highBit(int32_t value) 1.124 +{ 1.125 + int8_t bit = 0; 1.126 + 1.127 + if (value <= 0) { 1.128 + return -32; 1.129 + } 1.130 + 1.131 + if (value >= 1 << 16) { 1.132 + value >>= 16; 1.133 + bit += 16; 1.134 + } 1.135 + 1.136 + if (value >= 1 << 8) { 1.137 + value >>= 8; 1.138 + bit += 8; 1.139 + } 1.140 + 1.141 + if (value >= 1 << 4) { 1.142 + value >>= 4; 1.143 + bit += 4; 1.144 + } 1.145 + 1.146 + if (value >= 1 << 2) { 1.147 + value >>= 2; 1.148 + bit += 2; 1.149 + } 1.150 + 1.151 + if (value >= 1 << 1) { 1.152 + value >>= 1; 1.153 + bit += 1; 1.154 + } 1.155 + 1.156 + return bit; 1.157 +} 1.158 + 1.159 +static int32_t 1.160 +getPairIndex(UChar32 ch) 1.161 +{ 1.162 + int32_t pairedCharCount = ARRAY_SIZE(pairedChars); 1.163 + int32_t pairedCharPower = 1 << highBit(pairedCharCount); 1.164 + int32_t pairedCharExtra = pairedCharCount - pairedCharPower; 1.165 + 1.166 + int32_t probe = pairedCharPower; 1.167 + int32_t pairIndex = 0; 1.168 + 1.169 + if (ch >= pairedChars[pairedCharExtra]) { 1.170 + pairIndex = pairedCharExtra; 1.171 + } 1.172 + 1.173 + while (probe > (1 << 0)) { 1.174 + probe >>= 1; 1.175 + 1.176 + if (ch >= pairedChars[pairIndex + probe]) { 1.177 + pairIndex += probe; 1.178 + } 1.179 + } 1.180 + 1.181 + if (pairedChars[pairIndex] != ch) { 1.182 + pairIndex = -1; 1.183 + } 1.184 + 1.185 + return pairIndex; 1.186 +} 1.187 + 1.188 +static UBool 1.189 +sameScript(UScriptCode scriptOne, UScriptCode scriptTwo) 1.190 +{ 1.191 + return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo; 1.192 +} 1.193 + 1.194 +U_CAPI UScriptRun * U_EXPORT2 1.195 +uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode) 1.196 +{ 1.197 + UScriptRun *result = NULL; 1.198 + 1.199 + if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { 1.200 + return NULL; 1.201 + } 1.202 + 1.203 + result = uprv_malloc(sizeof (UScriptRun)); 1.204 + 1.205 + if (result == NULL) { 1.206 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.207 + return NULL; 1.208 + } 1.209 + 1.210 + uscript_setRunText(result, src, length, pErrorCode); 1.211 + 1.212 + /* Release the UScriptRun if uscript_setRunText() returns an error */ 1.213 + if (U_FAILURE(*pErrorCode)) { 1.214 + uprv_free(result); 1.215 + result = NULL; 1.216 + } 1.217 + 1.218 + return result; 1.219 +} 1.220 + 1.221 +U_CAPI void U_EXPORT2 1.222 +uscript_closeRun(UScriptRun *scriptRun) 1.223 +{ 1.224 + if (scriptRun != NULL) { 1.225 + uprv_free(scriptRun); 1.226 + } 1.227 +} 1.228 + 1.229 +U_CAPI void U_EXPORT2 1.230 +uscript_resetRun(UScriptRun *scriptRun) 1.231 +{ 1.232 + if (scriptRun != NULL) { 1.233 + scriptRun->scriptStart = 0; 1.234 + scriptRun->scriptLimit = 0; 1.235 + scriptRun->scriptCode = USCRIPT_INVALID_CODE; 1.236 + scriptRun->parenSP = -1; 1.237 + scriptRun->pushCount = 0; 1.238 + scriptRun->fixupCount = 0; 1.239 + } 1.240 +} 1.241 + 1.242 +U_CAPI void U_EXPORT2 1.243 +uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode) 1.244 +{ 1.245 + if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { 1.246 + return; 1.247 + } 1.248 + 1.249 + if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) { 1.250 + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1.251 + return; 1.252 + } 1.253 + 1.254 + scriptRun->textArray = src; 1.255 + scriptRun->textLength = length; 1.256 + 1.257 + uscript_resetRun(scriptRun); 1.258 +} 1.259 + 1.260 +U_CAPI UBool U_EXPORT2 1.261 +uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript) 1.262 +{ 1.263 + UErrorCode error = U_ZERO_ERROR; 1.264 + 1.265 + /* if we've fallen off the end of the text, we're done */ 1.266 + if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) { 1.267 + return FALSE; 1.268 + } 1.269 + 1.270 + SYNC_FIXUP(scriptRun); 1.271 + scriptRun->scriptCode = USCRIPT_COMMON; 1.272 + 1.273 + for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) { 1.274 + UChar high = scriptRun->textArray[scriptRun->scriptLimit]; 1.275 + UChar32 ch = high; 1.276 + UScriptCode sc; 1.277 + int32_t pairIndex; 1.278 + 1.279 + /* 1.280 + * if the character is a high surrogate and it's not the last one 1.281 + * in the text, see if it's followed by a low surrogate 1.282 + */ 1.283 + if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) { 1.284 + UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1]; 1.285 + 1.286 + /* 1.287 + * if it is followed by a low surrogate, 1.288 + * consume it and form the full character 1.289 + */ 1.290 + if (low >= 0xDC00 && low <= 0xDFFF) { 1.291 + ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000; 1.292 + scriptRun->scriptLimit += 1; 1.293 + } 1.294 + } 1.295 + 1.296 + sc = uscript_getScript(ch, &error); 1.297 + pairIndex = getPairIndex(ch); 1.298 + 1.299 + /* 1.300 + * Paired character handling: 1.301 + * 1.302 + * if it's an open character, push it onto the stack. 1.303 + * if it's a close character, find the matching open on the 1.304 + * stack, and use that script code. Any non-matching open 1.305 + * characters above it on the stack will be poped. 1.306 + */ 1.307 + if (pairIndex >= 0) { 1.308 + if ((pairIndex & 1) == 0) { 1.309 + push(scriptRun, pairIndex, scriptRun->scriptCode); 1.310 + } else { 1.311 + int32_t pi = pairIndex & ~1; 1.312 + 1.313 + while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) { 1.314 + pop(scriptRun); 1.315 + } 1.316 + 1.317 + if (STACK_IS_NOT_EMPTY(scriptRun)) { 1.318 + sc = TOP(scriptRun).scriptCode; 1.319 + } 1.320 + } 1.321 + } 1.322 + 1.323 + if (sameScript(scriptRun->scriptCode, sc)) { 1.324 + if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { 1.325 + scriptRun->scriptCode = sc; 1.326 + 1.327 + fixup(scriptRun, scriptRun->scriptCode); 1.328 + } 1.329 + 1.330 + /* 1.331 + * if this character is a close paired character, 1.332 + * pop the matching open character from the stack 1.333 + */ 1.334 + if (pairIndex >= 0 && (pairIndex & 1) != 0) { 1.335 + pop(scriptRun); 1.336 + } 1.337 + } else { 1.338 + /* 1.339 + * if the run broke on a surrogate pair, 1.340 + * end it before the high surrogate 1.341 + */ 1.342 + if (ch >= 0x10000) { 1.343 + scriptRun->scriptLimit -= 1; 1.344 + } 1.345 + 1.346 + break; 1.347 + } 1.348 + } 1.349 + 1.350 + 1.351 + if (pRunStart != NULL) { 1.352 + *pRunStart = scriptRun->scriptStart; 1.353 + } 1.354 + 1.355 + if (pRunLimit != NULL) { 1.356 + *pRunLimit = scriptRun->scriptLimit; 1.357 + } 1.358 + 1.359 + if (pRunScript != NULL) { 1.360 + *pRunScript = scriptRun->scriptCode; 1.361 + } 1.362 + 1.363 + return TRUE; 1.364 +}