|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2002-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: uprops.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2002feb24 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * Implementations for mostly non-core Unicode character properties |
|
17 * stored in uprops.icu. |
|
18 * |
|
19 * With the APIs implemented here, almost all properties files and |
|
20 * their associated implementation files are used from this file, |
|
21 * including those for normalization and case mappings. |
|
22 */ |
|
23 |
|
24 #include "unicode/utypes.h" |
|
25 #include "unicode/uchar.h" |
|
26 #include "unicode/unorm2.h" |
|
27 #include "unicode/uscript.h" |
|
28 #include "unicode/ustring.h" |
|
29 #include "cstring.h" |
|
30 #include "normalizer2impl.h" |
|
31 #include "ucln_cmn.h" |
|
32 #include "umutex.h" |
|
33 #include "ubidi_props.h" |
|
34 #include "uprops.h" |
|
35 #include "ucase.h" |
|
36 #include "ustr_imp.h" |
|
37 |
|
38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
|
39 |
|
40 U_NAMESPACE_USE |
|
41 |
|
42 #define GET_BIDI_PROPS() ubidi_getSingleton() |
|
43 |
|
44 /* general properties API functions ----------------------------------------- */ |
|
45 |
|
46 struct BinaryProperty; |
|
47 |
|
48 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); |
|
49 |
|
50 struct BinaryProperty { |
|
51 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 |
|
52 uint32_t mask; |
|
53 BinaryPropertyContains *contains; |
|
54 }; |
|
55 |
|
56 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { |
|
57 /* systematic, directly stored properties */ |
|
58 return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; |
|
59 } |
|
60 |
|
61 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { |
|
62 return ucase_hasBinaryProperty(c, which); |
|
63 } |
|
64 |
|
65 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
66 return ubidi_isBidiControl(GET_BIDI_PROPS(), c); |
|
67 } |
|
68 |
|
69 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
70 return ubidi_isMirrored(GET_BIDI_PROPS(), c); |
|
71 } |
|
72 |
|
73 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
74 return ubidi_isJoinControl(GET_BIDI_PROPS(), c); |
|
75 } |
|
76 |
|
77 #if UCONFIG_NO_NORMALIZATION |
|
78 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { |
|
79 return FALSE; |
|
80 } |
|
81 #else |
|
82 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
83 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. |
|
84 UErrorCode errorCode=U_ZERO_ERROR; |
|
85 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
|
86 return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); |
|
87 } |
|
88 #endif |
|
89 |
|
90 // UCHAR_NF*_INERT properties |
|
91 #if UCONFIG_NO_NORMALIZATION |
|
92 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { |
|
93 return FALSE; |
|
94 } |
|
95 #else |
|
96 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { |
|
97 UErrorCode errorCode=U_ZERO_ERROR; |
|
98 const Normalizer2 *norm2=Normalizer2Factory::getInstance( |
|
99 (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); |
|
100 return U_SUCCESS(errorCode) && norm2->isInert(c); |
|
101 } |
|
102 #endif |
|
103 |
|
104 #if UCONFIG_NO_NORMALIZATION |
|
105 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { |
|
106 return FALSE; |
|
107 } |
|
108 #else |
|
109 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
110 UnicodeString nfd; |
|
111 UErrorCode errorCode=U_ZERO_ERROR; |
|
112 const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); |
|
113 if(U_FAILURE(errorCode)) { |
|
114 return FALSE; |
|
115 } |
|
116 if(nfcNorm2->getDecomposition(c, nfd)) { |
|
117 /* c has a decomposition */ |
|
118 if(nfd.length()==1) { |
|
119 c=nfd[0]; /* single BMP code point */ |
|
120 } else if(nfd.length()<=U16_MAX_LENGTH && |
|
121 nfd.length()==U16_LENGTH(c=nfd.char32At(0)) |
|
122 ) { |
|
123 /* single supplementary code point */ |
|
124 } else { |
|
125 c=U_SENTINEL; |
|
126 } |
|
127 } else if(c<0) { |
|
128 return FALSE; /* protect against bad input */ |
|
129 } |
|
130 if(c>=0) { |
|
131 /* single code point */ |
|
132 const UCaseProps *csp=ucase_getSingleton(); |
|
133 const UChar *resultString; |
|
134 return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); |
|
135 } else { |
|
136 /* guess some large but stack-friendly capacity */ |
|
137 UChar dest[2*UCASE_MAX_STRING_LENGTH]; |
|
138 int32_t destLength; |
|
139 destLength=u_strFoldCase(dest, LENGTHOF(dest), |
|
140 nfd.getBuffer(), nfd.length(), |
|
141 U_FOLD_CASE_DEFAULT, &errorCode); |
|
142 return (UBool)(U_SUCCESS(errorCode) && |
|
143 0!=u_strCompare(nfd.getBuffer(), nfd.length(), |
|
144 dest, destLength, FALSE)); |
|
145 } |
|
146 } |
|
147 #endif |
|
148 |
|
149 #if UCONFIG_NO_NORMALIZATION |
|
150 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { |
|
151 return FALSE; |
|
152 } |
|
153 #else |
|
154 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
155 UErrorCode errorCode=U_ZERO_ERROR; |
|
156 const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); |
|
157 if(U_FAILURE(errorCode)) { |
|
158 return FALSE; |
|
159 } |
|
160 UnicodeString src(c); |
|
161 UnicodeString dest; |
|
162 { |
|
163 // The ReorderingBuffer must be in a block because its destructor |
|
164 // needs to release dest's buffer before we look at its contents. |
|
165 ReorderingBuffer buffer(*kcf, dest); |
|
166 // Small destCapacity for NFKC_CF(c). |
|
167 if(buffer.init(5, errorCode)) { |
|
168 const UChar *srcArray=src.getBuffer(); |
|
169 kcf->compose(srcArray, srcArray+src.length(), FALSE, |
|
170 TRUE, buffer, errorCode); |
|
171 } |
|
172 } |
|
173 return U_SUCCESS(errorCode) && dest!=src; |
|
174 } |
|
175 #endif |
|
176 |
|
177 #if UCONFIG_NO_NORMALIZATION |
|
178 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { |
|
179 return FALSE; |
|
180 } |
|
181 #else |
|
182 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
183 UErrorCode errorCode=U_ZERO_ERROR; |
|
184 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
|
185 return |
|
186 U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && |
|
187 impl->isCanonSegmentStarter(c); |
|
188 } |
|
189 #endif |
|
190 |
|
191 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
192 return u_isalnumPOSIX(c); |
|
193 } |
|
194 |
|
195 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
196 return u_isblank(c); |
|
197 } |
|
198 |
|
199 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
200 return u_isgraphPOSIX(c); |
|
201 } |
|
202 |
|
203 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
204 return u_isprintPOSIX(c); |
|
205 } |
|
206 |
|
207 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
208 return u_isxdigit(c); |
|
209 } |
|
210 |
|
211 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ |
|
212 /* |
|
213 * column and mask values for binary properties from u_getUnicodeProperties(). |
|
214 * Must be in order of corresponding UProperty, |
|
215 * and there must be exactly one entry per binary UProperty. |
|
216 * |
|
217 * Properties with mask==0 are handled in code. |
|
218 * For them, column is the UPropertySource value. |
|
219 */ |
|
220 { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, |
|
221 { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, |
|
222 { UPROPS_SRC_BIDI, 0, isBidiControl }, |
|
223 { UPROPS_SRC_BIDI, 0, isMirrored }, |
|
224 { 1, U_MASK(UPROPS_DASH), defaultContains }, |
|
225 { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, |
|
226 { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, |
|
227 { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, |
|
228 { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, |
|
229 { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, |
|
230 { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, |
|
231 { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, |
|
232 { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, |
|
233 { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, |
|
234 { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, |
|
235 { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, |
|
236 { 1, U_MASK(UPROPS_ID_START), defaultContains }, |
|
237 { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, |
|
238 { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, |
|
239 { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, |
|
240 { UPROPS_SRC_BIDI, 0, isJoinControl }, |
|
241 { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, |
|
242 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE |
|
243 { 1, U_MASK(UPROPS_MATH), defaultContains }, |
|
244 { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, |
|
245 { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, |
|
246 { 1, U_MASK(UPROPS_RADICAL), defaultContains }, |
|
247 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED |
|
248 { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, |
|
249 { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, |
|
250 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE |
|
251 { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, |
|
252 { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, |
|
253 { 1, U_MASK(UPROPS_XID_START), defaultContains }, |
|
254 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE |
|
255 { 1, U_MASK(UPROPS_S_TERM), defaultContains }, |
|
256 { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, |
|
257 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT |
|
258 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT |
|
259 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT |
|
260 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT |
|
261 { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, |
|
262 { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, |
|
263 { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, |
|
264 { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, |
|
265 { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, |
|
266 { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, |
|
267 { UPROPS_SRC_CHAR, 0, isPOSIX_print }, |
|
268 { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, |
|
269 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED |
|
270 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE |
|
271 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED |
|
272 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED |
|
273 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED |
|
274 { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, |
|
275 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED |
|
276 { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded } |
|
277 }; |
|
278 |
|
279 U_CAPI UBool U_EXPORT2 |
|
280 u_hasBinaryProperty(UChar32 c, UProperty which) { |
|
281 /* c is range-checked in the functions that are called from here */ |
|
282 if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { |
|
283 /* not a known binary property */ |
|
284 return FALSE; |
|
285 } else { |
|
286 const BinaryProperty &prop=binProps[which]; |
|
287 return prop.contains(prop, c, which); |
|
288 } |
|
289 } |
|
290 |
|
291 struct IntProperty; |
|
292 |
|
293 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); |
|
294 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); |
|
295 |
|
296 struct IntProperty { |
|
297 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 |
|
298 uint32_t mask; |
|
299 int32_t shift; // =maxValue if getMaxValueFromShift() is used |
|
300 IntPropertyGetValue *getValue; |
|
301 IntPropertyGetMaxValue *getMaxValue; |
|
302 }; |
|
303 |
|
304 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { |
|
305 /* systematic, directly stored properties */ |
|
306 return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; |
|
307 } |
|
308 |
|
309 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { |
|
310 return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; |
|
311 } |
|
312 |
|
313 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { |
|
314 return prop.shift; |
|
315 } |
|
316 |
|
317 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
318 return (int32_t)u_charDirection(c); |
|
319 } |
|
320 |
|
321 static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
322 return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); |
|
323 } |
|
324 |
|
325 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { |
|
326 return ubidi_getMaxValue(GET_BIDI_PROPS(), which); |
|
327 } |
|
328 |
|
329 #if UCONFIG_NO_NORMALIZATION |
|
330 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { |
|
331 return 0; |
|
332 } |
|
333 #else |
|
334 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
335 return u_getCombiningClass(c); |
|
336 } |
|
337 #endif |
|
338 |
|
339 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
340 return (int32_t)u_charType(c); |
|
341 } |
|
342 |
|
343 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
344 return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); |
|
345 } |
|
346 |
|
347 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
348 return ubidi_getJoiningType(GET_BIDI_PROPS(), c); |
|
349 } |
|
350 |
|
351 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
352 int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); |
|
353 return UPROPS_NTV_GET_TYPE(ntv); |
|
354 } |
|
355 |
|
356 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
357 UErrorCode errorCode=U_ZERO_ERROR; |
|
358 return (int32_t)uscript_getScript(c, &errorCode); |
|
359 } |
|
360 |
|
361 /* |
|
362 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. |
|
363 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. |
|
364 */ |
|
365 static const UHangulSyllableType gcbToHst[]={ |
|
366 U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ |
|
367 U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ |
|
368 U_HST_NOT_APPLICABLE, /* U_GCB_CR */ |
|
369 U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ |
|
370 U_HST_LEADING_JAMO, /* U_GCB_L */ |
|
371 U_HST_NOT_APPLICABLE, /* U_GCB_LF */ |
|
372 U_HST_LV_SYLLABLE, /* U_GCB_LV */ |
|
373 U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ |
|
374 U_HST_TRAILING_JAMO, /* U_GCB_T */ |
|
375 U_HST_VOWEL_JAMO /* U_GCB_V */ |
|
376 /* |
|
377 * Omit GCB values beyond what we need for hst. |
|
378 * The code below checks for the array length. |
|
379 */ |
|
380 }; |
|
381 |
|
382 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
383 /* see comments on gcbToHst[] above */ |
|
384 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; |
|
385 if(gcb<LENGTHOF(gcbToHst)) { |
|
386 return gcbToHst[gcb]; |
|
387 } else { |
|
388 return U_HST_NOT_APPLICABLE; |
|
389 } |
|
390 } |
|
391 |
|
392 #if UCONFIG_NO_NORMALIZATION |
|
393 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { |
|
394 return 0; |
|
395 } |
|
396 #else |
|
397 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { |
|
398 return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); |
|
399 } |
|
400 #endif |
|
401 |
|
402 #if UCONFIG_NO_NORMALIZATION |
|
403 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { |
|
404 return 0; |
|
405 } |
|
406 #else |
|
407 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
408 return unorm_getFCD16(c)>>8; |
|
409 } |
|
410 #endif |
|
411 |
|
412 #if UCONFIG_NO_NORMALIZATION |
|
413 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { |
|
414 return 0; |
|
415 } |
|
416 #else |
|
417 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
|
418 return unorm_getFCD16(c)&0xff; |
|
419 } |
|
420 #endif |
|
421 |
|
422 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ |
|
423 /* |
|
424 * column, mask and shift values for int-value properties from u_getUnicodeProperties(). |
|
425 * Must be in order of corresponding UProperty, |
|
426 * and there must be exactly one entry per int UProperty. |
|
427 * |
|
428 * Properties with mask==0 are handled in code. |
|
429 * For them, column is the UPropertySource value. |
|
430 */ |
|
431 { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, |
|
432 { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, |
|
433 { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, |
|
434 { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, |
|
435 { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, |
|
436 { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, |
|
437 { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, |
|
438 { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, |
|
439 { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
|
440 { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, |
|
441 { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue }, |
|
442 { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, |
|
443 // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" |
|
444 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, |
|
445 // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" |
|
446 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, |
|
447 // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE |
|
448 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, |
|
449 // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE |
|
450 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, |
|
451 { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, |
|
452 { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, |
|
453 { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
|
454 { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
|
455 { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
|
456 { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, |
|
457 }; |
|
458 |
|
459 U_CAPI int32_t U_EXPORT2 |
|
460 u_getIntPropertyValue(UChar32 c, UProperty which) { |
|
461 if(which<UCHAR_INT_START) { |
|
462 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { |
|
463 const BinaryProperty &prop=binProps[which]; |
|
464 return prop.contains(prop, c, which); |
|
465 } |
|
466 } else if(which<UCHAR_INT_LIMIT) { |
|
467 const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
|
468 return prop.getValue(prop, c, which); |
|
469 } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { |
|
470 return U_MASK(u_charType(c)); |
|
471 } |
|
472 return 0; // undefined |
|
473 } |
|
474 |
|
475 U_CAPI int32_t U_EXPORT2 |
|
476 u_getIntPropertyMinValue(UProperty /*which*/) { |
|
477 return 0; /* all binary/enum/int properties have a minimum value of 0 */ |
|
478 } |
|
479 |
|
480 U_CAPI int32_t U_EXPORT2 |
|
481 u_getIntPropertyMaxValue(UProperty which) { |
|
482 if(which<UCHAR_INT_START) { |
|
483 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { |
|
484 return 1; // maximum TRUE for all binary properties |
|
485 } |
|
486 } else if(which<UCHAR_INT_LIMIT) { |
|
487 const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
|
488 return prop.getMaxValue(prop, which); |
|
489 } |
|
490 return -1; // undefined |
|
491 } |
|
492 |
|
493 U_CFUNC UPropertySource U_EXPORT2 |
|
494 uprops_getSource(UProperty which) { |
|
495 if(which<UCHAR_BINARY_START) { |
|
496 return UPROPS_SRC_NONE; /* undefined */ |
|
497 } else if(which<UCHAR_BINARY_LIMIT) { |
|
498 const BinaryProperty &prop=binProps[which]; |
|
499 if(prop.mask!=0) { |
|
500 return UPROPS_SRC_PROPSVEC; |
|
501 } else { |
|
502 return (UPropertySource)prop.column; |
|
503 } |
|
504 } else if(which<UCHAR_INT_START) { |
|
505 return UPROPS_SRC_NONE; /* undefined */ |
|
506 } else if(which<UCHAR_INT_LIMIT) { |
|
507 const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
|
508 if(prop.mask!=0) { |
|
509 return UPROPS_SRC_PROPSVEC; |
|
510 } else { |
|
511 return (UPropertySource)prop.column; |
|
512 } |
|
513 } else if(which<UCHAR_STRING_START) { |
|
514 switch(which) { |
|
515 case UCHAR_GENERAL_CATEGORY_MASK: |
|
516 case UCHAR_NUMERIC_VALUE: |
|
517 return UPROPS_SRC_CHAR; |
|
518 |
|
519 default: |
|
520 return UPROPS_SRC_NONE; |
|
521 } |
|
522 } else if(which<UCHAR_STRING_LIMIT) { |
|
523 switch(which) { |
|
524 case UCHAR_AGE: |
|
525 return UPROPS_SRC_PROPSVEC; |
|
526 |
|
527 case UCHAR_BIDI_MIRRORING_GLYPH: |
|
528 return UPROPS_SRC_BIDI; |
|
529 |
|
530 case UCHAR_CASE_FOLDING: |
|
531 case UCHAR_LOWERCASE_MAPPING: |
|
532 case UCHAR_SIMPLE_CASE_FOLDING: |
|
533 case UCHAR_SIMPLE_LOWERCASE_MAPPING: |
|
534 case UCHAR_SIMPLE_TITLECASE_MAPPING: |
|
535 case UCHAR_SIMPLE_UPPERCASE_MAPPING: |
|
536 case UCHAR_TITLECASE_MAPPING: |
|
537 case UCHAR_UPPERCASE_MAPPING: |
|
538 return UPROPS_SRC_CASE; |
|
539 |
|
540 case UCHAR_ISO_COMMENT: |
|
541 case UCHAR_NAME: |
|
542 case UCHAR_UNICODE_1_NAME: |
|
543 return UPROPS_SRC_NAMES; |
|
544 |
|
545 default: |
|
546 return UPROPS_SRC_NONE; |
|
547 } |
|
548 } else { |
|
549 switch(which) { |
|
550 case UCHAR_SCRIPT_EXTENSIONS: |
|
551 return UPROPS_SRC_PROPSVEC; |
|
552 default: |
|
553 return UPROPS_SRC_NONE; /* undefined */ |
|
554 } |
|
555 } |
|
556 } |
|
557 |
|
558 #if !UCONFIG_NO_NORMALIZATION |
|
559 |
|
560 U_CAPI int32_t U_EXPORT2 |
|
561 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { |
|
562 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
563 return 0; |
|
564 } |
|
565 if(destCapacity<0 || (dest==NULL && destCapacity>0)) { |
|
566 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
567 return 0; |
|
568 } |
|
569 // Compute the FC_NFKC_Closure on the fly: |
|
570 // We have the API for complete coverage of Unicode properties, although |
|
571 // this value by itself is not useful via API. |
|
572 // (What could be useful is a custom normalization table that combines |
|
573 // case folding and NFKC.) |
|
574 // For the derivation, see Unicode's DerivedNormalizationProps.txt. |
|
575 const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode); |
|
576 const UCaseProps *csp=ucase_getSingleton(); |
|
577 if(U_FAILURE(*pErrorCode)) { |
|
578 return 0; |
|
579 } |
|
580 // first: b = NFKC(Fold(a)) |
|
581 UnicodeString folded1String; |
|
582 const UChar *folded1; |
|
583 int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT); |
|
584 if(folded1Length<0) { |
|
585 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); |
|
586 if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { |
|
587 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC |
|
588 } |
|
589 folded1String.setTo(c); |
|
590 } else { |
|
591 if(folded1Length>UCASE_MAX_STRING_LENGTH) { |
|
592 folded1String.setTo(folded1Length); |
|
593 } else { |
|
594 folded1String.setTo(FALSE, folded1, folded1Length); |
|
595 } |
|
596 } |
|
597 UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); |
|
598 // second: c = NFKC(Fold(b)) |
|
599 UnicodeString folded2String(kc1); |
|
600 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); |
|
601 // if (c != b) add the mapping from a to c |
|
602 if(U_FAILURE(*pErrorCode) || kc1==kc2) { |
|
603 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); |
|
604 } else { |
|
605 return kc2.extract(dest, destCapacity, *pErrorCode); |
|
606 } |
|
607 } |
|
608 |
|
609 #endif |