|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2009-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: normalizer2.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2009nov22 |
|
14 * created by: Markus W. Scherer |
|
15 */ |
|
16 |
|
17 #include "unicode/utypes.h" |
|
18 |
|
19 #if !UCONFIG_NO_NORMALIZATION |
|
20 |
|
21 #include "unicode/localpointer.h" |
|
22 #include "unicode/normalizer2.h" |
|
23 #include "unicode/unistr.h" |
|
24 #include "unicode/unorm.h" |
|
25 #include "cpputils.h" |
|
26 #include "cstring.h" |
|
27 #include "mutex.h" |
|
28 #include "normalizer2impl.h" |
|
29 #include "uassert.h" |
|
30 #include "ucln_cmn.h" |
|
31 #include "uhash.h" |
|
32 |
|
33 U_NAMESPACE_BEGIN |
|
34 |
|
35 // Public API dispatch via Normalizer2 subclasses -------------------------- *** |
|
36 |
|
37 Normalizer2::~Normalizer2() {} |
|
38 |
|
39 UBool |
|
40 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { |
|
41 return FALSE; |
|
42 } |
|
43 |
|
44 UChar32 |
|
45 Normalizer2::composePair(UChar32, UChar32) const { |
|
46 return U_SENTINEL; |
|
47 } |
|
48 |
|
49 uint8_t |
|
50 Normalizer2::getCombiningClass(UChar32 /*c*/) const { |
|
51 return 0; |
|
52 } |
|
53 |
|
54 // Normalizer2 implementation for the old UNORM_NONE. |
|
55 class NoopNormalizer2 : public Normalizer2 { |
|
56 virtual ~NoopNormalizer2(); |
|
57 |
|
58 virtual UnicodeString & |
|
59 normalize(const UnicodeString &src, |
|
60 UnicodeString &dest, |
|
61 UErrorCode &errorCode) const { |
|
62 if(U_SUCCESS(errorCode)) { |
|
63 if(&dest!=&src) { |
|
64 dest=src; |
|
65 } else { |
|
66 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
67 } |
|
68 } |
|
69 return dest; |
|
70 } |
|
71 virtual UnicodeString & |
|
72 normalizeSecondAndAppend(UnicodeString &first, |
|
73 const UnicodeString &second, |
|
74 UErrorCode &errorCode) const { |
|
75 if(U_SUCCESS(errorCode)) { |
|
76 if(&first!=&second) { |
|
77 first.append(second); |
|
78 } else { |
|
79 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
80 } |
|
81 } |
|
82 return first; |
|
83 } |
|
84 virtual UnicodeString & |
|
85 append(UnicodeString &first, |
|
86 const UnicodeString &second, |
|
87 UErrorCode &errorCode) const { |
|
88 if(U_SUCCESS(errorCode)) { |
|
89 if(&first!=&second) { |
|
90 first.append(second); |
|
91 } else { |
|
92 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
93 } |
|
94 } |
|
95 return first; |
|
96 } |
|
97 virtual UBool |
|
98 getDecomposition(UChar32, UnicodeString &) const { |
|
99 return FALSE; |
|
100 } |
|
101 // No need to override the default getRawDecomposition(). |
|
102 virtual UBool |
|
103 isNormalized(const UnicodeString &, UErrorCode &) const { |
|
104 return TRUE; |
|
105 } |
|
106 virtual UNormalizationCheckResult |
|
107 quickCheck(const UnicodeString &, UErrorCode &) const { |
|
108 return UNORM_YES; |
|
109 } |
|
110 virtual int32_t |
|
111 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { |
|
112 return s.length(); |
|
113 } |
|
114 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } |
|
115 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } |
|
116 virtual UBool isInert(UChar32) const { return TRUE; } |
|
117 }; |
|
118 |
|
119 NoopNormalizer2::~NoopNormalizer2() {} |
|
120 |
|
121 // Intermediate class: |
|
122 // Has Normalizer2Impl and does boilerplate argument checking and setup. |
|
123 class Normalizer2WithImpl : public Normalizer2 { |
|
124 public: |
|
125 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} |
|
126 virtual ~Normalizer2WithImpl(); |
|
127 |
|
128 // normalize |
|
129 virtual UnicodeString & |
|
130 normalize(const UnicodeString &src, |
|
131 UnicodeString &dest, |
|
132 UErrorCode &errorCode) const { |
|
133 if(U_FAILURE(errorCode)) { |
|
134 dest.setToBogus(); |
|
135 return dest; |
|
136 } |
|
137 const UChar *sArray=src.getBuffer(); |
|
138 if(&dest==&src || sArray==NULL) { |
|
139 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
140 dest.setToBogus(); |
|
141 return dest; |
|
142 } |
|
143 dest.remove(); |
|
144 ReorderingBuffer buffer(impl, dest); |
|
145 if(buffer.init(src.length(), errorCode)) { |
|
146 normalize(sArray, sArray+src.length(), buffer, errorCode); |
|
147 } |
|
148 return dest; |
|
149 } |
|
150 virtual void |
|
151 normalize(const UChar *src, const UChar *limit, |
|
152 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
|
153 |
|
154 // normalize and append |
|
155 virtual UnicodeString & |
|
156 normalizeSecondAndAppend(UnicodeString &first, |
|
157 const UnicodeString &second, |
|
158 UErrorCode &errorCode) const { |
|
159 return normalizeSecondAndAppend(first, second, TRUE, errorCode); |
|
160 } |
|
161 virtual UnicodeString & |
|
162 append(UnicodeString &first, |
|
163 const UnicodeString &second, |
|
164 UErrorCode &errorCode) const { |
|
165 return normalizeSecondAndAppend(first, second, FALSE, errorCode); |
|
166 } |
|
167 UnicodeString & |
|
168 normalizeSecondAndAppend(UnicodeString &first, |
|
169 const UnicodeString &second, |
|
170 UBool doNormalize, |
|
171 UErrorCode &errorCode) const { |
|
172 uprv_checkCanGetBuffer(first, errorCode); |
|
173 if(U_FAILURE(errorCode)) { |
|
174 return first; |
|
175 } |
|
176 const UChar *secondArray=second.getBuffer(); |
|
177 if(&first==&second || secondArray==NULL) { |
|
178 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
179 return first; |
|
180 } |
|
181 int32_t firstLength=first.length(); |
|
182 UnicodeString safeMiddle; |
|
183 { |
|
184 ReorderingBuffer buffer(impl, first); |
|
185 if(buffer.init(firstLength+second.length(), errorCode)) { |
|
186 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, |
|
187 safeMiddle, buffer, errorCode); |
|
188 } |
|
189 } // The ReorderingBuffer destructor finalizes the first string. |
|
190 if(U_FAILURE(errorCode)) { |
|
191 // Restore the modified suffix of the first string. |
|
192 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); |
|
193 } |
|
194 return first; |
|
195 } |
|
196 virtual void |
|
197 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
|
198 UnicodeString &safeMiddle, |
|
199 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
|
200 virtual UBool |
|
201 getDecomposition(UChar32 c, UnicodeString &decomposition) const { |
|
202 UChar buffer[4]; |
|
203 int32_t length; |
|
204 const UChar *d=impl.getDecomposition(c, buffer, length); |
|
205 if(d==NULL) { |
|
206 return FALSE; |
|
207 } |
|
208 if(d==buffer) { |
|
209 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) |
|
210 } else { |
|
211 decomposition.setTo(FALSE, d, length); // read-only alias |
|
212 } |
|
213 return TRUE; |
|
214 } |
|
215 virtual UBool |
|
216 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { |
|
217 UChar buffer[30]; |
|
218 int32_t length; |
|
219 const UChar *d=impl.getRawDecomposition(c, buffer, length); |
|
220 if(d==NULL) { |
|
221 return FALSE; |
|
222 } |
|
223 if(d==buffer) { |
|
224 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) |
|
225 } else { |
|
226 decomposition.setTo(FALSE, d, length); // read-only alias |
|
227 } |
|
228 return TRUE; |
|
229 } |
|
230 virtual UChar32 |
|
231 composePair(UChar32 a, UChar32 b) const { |
|
232 return impl.composePair(a, b); |
|
233 } |
|
234 |
|
235 virtual uint8_t |
|
236 getCombiningClass(UChar32 c) const { |
|
237 return impl.getCC(impl.getNorm16(c)); |
|
238 } |
|
239 |
|
240 // quick checks |
|
241 virtual UBool |
|
242 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
|
243 if(U_FAILURE(errorCode)) { |
|
244 return FALSE; |
|
245 } |
|
246 const UChar *sArray=s.getBuffer(); |
|
247 if(sArray==NULL) { |
|
248 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
249 return FALSE; |
|
250 } |
|
251 const UChar *sLimit=sArray+s.length(); |
|
252 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); |
|
253 } |
|
254 virtual UNormalizationCheckResult |
|
255 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
|
256 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; |
|
257 } |
|
258 virtual int32_t |
|
259 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { |
|
260 if(U_FAILURE(errorCode)) { |
|
261 return 0; |
|
262 } |
|
263 const UChar *sArray=s.getBuffer(); |
|
264 if(sArray==NULL) { |
|
265 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
266 return 0; |
|
267 } |
|
268 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); |
|
269 } |
|
270 virtual const UChar * |
|
271 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; |
|
272 |
|
273 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { |
|
274 return UNORM_YES; |
|
275 } |
|
276 |
|
277 const Normalizer2Impl &impl; |
|
278 }; |
|
279 |
|
280 Normalizer2WithImpl::~Normalizer2WithImpl() {} |
|
281 |
|
282 class DecomposeNormalizer2 : public Normalizer2WithImpl { |
|
283 public: |
|
284 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
|
285 virtual ~DecomposeNormalizer2(); |
|
286 |
|
287 private: |
|
288 virtual void |
|
289 normalize(const UChar *src, const UChar *limit, |
|
290 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
|
291 impl.decompose(src, limit, &buffer, errorCode); |
|
292 } |
|
293 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
|
294 virtual void |
|
295 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
|
296 UnicodeString &safeMiddle, |
|
297 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
|
298 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
|
299 } |
|
300 virtual const UChar * |
|
301 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
|
302 return impl.decompose(src, limit, NULL, errorCode); |
|
303 } |
|
304 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
|
305 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
|
306 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; |
|
307 } |
|
308 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } |
|
309 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } |
|
310 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } |
|
311 }; |
|
312 |
|
313 DecomposeNormalizer2::~DecomposeNormalizer2() {} |
|
314 |
|
315 class ComposeNormalizer2 : public Normalizer2WithImpl { |
|
316 public: |
|
317 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : |
|
318 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} |
|
319 virtual ~ComposeNormalizer2(); |
|
320 |
|
321 private: |
|
322 virtual void |
|
323 normalize(const UChar *src, const UChar *limit, |
|
324 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
|
325 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); |
|
326 } |
|
327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
|
328 virtual void |
|
329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
|
330 UnicodeString &safeMiddle, |
|
331 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
|
332 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); |
|
333 } |
|
334 |
|
335 virtual UBool |
|
336 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
|
337 if(U_FAILURE(errorCode)) { |
|
338 return FALSE; |
|
339 } |
|
340 const UChar *sArray=s.getBuffer(); |
|
341 if(sArray==NULL) { |
|
342 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
343 return FALSE; |
|
344 } |
|
345 UnicodeString temp; |
|
346 ReorderingBuffer buffer(impl, temp); |
|
347 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization |
|
348 return FALSE; |
|
349 } |
|
350 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); |
|
351 } |
|
352 virtual UNormalizationCheckResult |
|
353 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
|
354 if(U_FAILURE(errorCode)) { |
|
355 return UNORM_MAYBE; |
|
356 } |
|
357 const UChar *sArray=s.getBuffer(); |
|
358 if(sArray==NULL) { |
|
359 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
360 return UNORM_MAYBE; |
|
361 } |
|
362 UNormalizationCheckResult qcResult=UNORM_YES; |
|
363 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); |
|
364 return qcResult; |
|
365 } |
|
366 virtual const UChar * |
|
367 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { |
|
368 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); |
|
369 } |
|
370 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
|
371 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
|
372 return impl.getCompQuickCheck(impl.getNorm16(c)); |
|
373 } |
|
374 virtual UBool hasBoundaryBefore(UChar32 c) const { |
|
375 return impl.hasCompBoundaryBefore(c); |
|
376 } |
|
377 virtual UBool hasBoundaryAfter(UChar32 c) const { |
|
378 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); |
|
379 } |
|
380 virtual UBool isInert(UChar32 c) const { |
|
381 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); |
|
382 } |
|
383 |
|
384 const UBool onlyContiguous; |
|
385 }; |
|
386 |
|
387 ComposeNormalizer2::~ComposeNormalizer2() {} |
|
388 |
|
389 class FCDNormalizer2 : public Normalizer2WithImpl { |
|
390 public: |
|
391 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
|
392 virtual ~FCDNormalizer2(); |
|
393 |
|
394 private: |
|
395 virtual void |
|
396 normalize(const UChar *src, const UChar *limit, |
|
397 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
|
398 impl.makeFCD(src, limit, &buffer, errorCode); |
|
399 } |
|
400 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
|
401 virtual void |
|
402 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
|
403 UnicodeString &safeMiddle, |
|
404 ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
|
405 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
|
406 } |
|
407 virtual const UChar * |
|
408 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
|
409 return impl.makeFCD(src, limit, NULL, errorCode); |
|
410 } |
|
411 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
|
412 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } |
|
413 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } |
|
414 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } |
|
415 }; |
|
416 |
|
417 FCDNormalizer2::~FCDNormalizer2() {} |
|
418 |
|
419 // instance cache ---------------------------------------------------------- *** |
|
420 |
|
421 struct Norm2AllModes : public UMemory { |
|
422 static Norm2AllModes *createInstance(const char *packageName, |
|
423 const char *name, |
|
424 UErrorCode &errorCode); |
|
425 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} |
|
426 |
|
427 Normalizer2Impl impl; |
|
428 ComposeNormalizer2 comp; |
|
429 DecomposeNormalizer2 decomp; |
|
430 FCDNormalizer2 fcd; |
|
431 ComposeNormalizer2 fcc; |
|
432 }; |
|
433 |
|
434 Norm2AllModes * |
|
435 Norm2AllModes::createInstance(const char *packageName, |
|
436 const char *name, |
|
437 UErrorCode &errorCode) { |
|
438 if(U_FAILURE(errorCode)) { |
|
439 return NULL; |
|
440 } |
|
441 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); |
|
442 if(allModes.isNull()) { |
|
443 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
444 return NULL; |
|
445 } |
|
446 allModes->impl.load(packageName, name, errorCode); |
|
447 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; |
|
448 } |
|
449 |
|
450 U_CDECL_BEGIN |
|
451 static UBool U_CALLCONV uprv_normalizer2_cleanup(); |
|
452 U_CDECL_END |
|
453 |
|
454 |
|
455 static Norm2AllModes *nfcSingleton; |
|
456 static Norm2AllModes *nfkcSingleton; |
|
457 static Norm2AllModes *nfkc_cfSingleton; |
|
458 static Normalizer2 *noopSingleton; |
|
459 static UHashtable *cache=NULL; |
|
460 |
|
461 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; |
|
462 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; |
|
463 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; |
|
464 static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; |
|
465 |
|
466 // UInitOnce singleton initialization function |
|
467 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { |
|
468 if (uprv_strcmp(what, "nfc") == 0) { |
|
469 nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); |
|
470 } else if (uprv_strcmp(what, "nfkc") == 0) { |
|
471 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); |
|
472 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { |
|
473 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); |
|
474 } else if (uprv_strcmp(what, "noop") == 0) { |
|
475 noopSingleton = new NoopNormalizer2; |
|
476 } else { |
|
477 U_ASSERT(FALSE); // Unknown singleton |
|
478 } |
|
479 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); |
|
480 } |
|
481 |
|
482 U_CDECL_BEGIN |
|
483 |
|
484 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { |
|
485 delete (Norm2AllModes *)allModes; |
|
486 } |
|
487 |
|
488 static UBool U_CALLCONV uprv_normalizer2_cleanup() { |
|
489 delete nfcSingleton; |
|
490 nfcSingleton = NULL; |
|
491 delete nfkcSingleton; |
|
492 nfkcSingleton = NULL; |
|
493 delete nfkc_cfSingleton; |
|
494 nfkc_cfSingleton = NULL; |
|
495 delete noopSingleton; |
|
496 noopSingleton = NULL; |
|
497 uhash_close(cache); |
|
498 cache=NULL; |
|
499 nfcInitOnce.reset(); |
|
500 nfkcInitOnce.reset(); |
|
501 nfkc_cfInitOnce.reset(); |
|
502 noopInitOnce.reset(); |
|
503 return TRUE; |
|
504 } |
|
505 |
|
506 U_CDECL_END |
|
507 |
|
508 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { |
|
509 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); |
|
510 return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL; |
|
511 } |
|
512 |
|
513 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { |
|
514 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); |
|
515 return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL; |
|
516 } |
|
517 |
|
518 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { |
|
519 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); |
|
520 return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL; |
|
521 } |
|
522 |
|
523 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { |
|
524 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); |
|
525 return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL; |
|
526 } |
|
527 |
|
528 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { |
|
529 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); |
|
530 return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL; |
|
531 } |
|
532 |
|
533 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { |
|
534 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); |
|
535 return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL; |
|
536 } |
|
537 |
|
538 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { |
|
539 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); |
|
540 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL; |
|
541 } |
|
542 |
|
543 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { |
|
544 umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode); |
|
545 return noopSingleton; |
|
546 } |
|
547 |
|
548 const Normalizer2 * |
|
549 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { |
|
550 if(U_FAILURE(errorCode)) { |
|
551 return NULL; |
|
552 } |
|
553 switch(mode) { |
|
554 case UNORM_NFD: |
|
555 return getNFDInstance(errorCode); |
|
556 case UNORM_NFKD: |
|
557 return getNFKDInstance(errorCode); |
|
558 case UNORM_NFC: |
|
559 return getNFCInstance(errorCode); |
|
560 case UNORM_NFKC: |
|
561 return getNFKCInstance(errorCode); |
|
562 case UNORM_FCD: |
|
563 return getFCDInstance(errorCode); |
|
564 default: // UNORM_NONE |
|
565 return getNoopInstance(errorCode); |
|
566 } |
|
567 } |
|
568 |
|
569 const Normalizer2Impl * |
|
570 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { |
|
571 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); |
|
572 return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL; |
|
573 } |
|
574 |
|
575 const Normalizer2Impl * |
|
576 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { |
|
577 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); |
|
578 return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL; |
|
579 } |
|
580 |
|
581 const Normalizer2Impl * |
|
582 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { |
|
583 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); |
|
584 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL; |
|
585 } |
|
586 |
|
587 const Normalizer2Impl * |
|
588 Normalizer2Factory::getImpl(const Normalizer2 *norm2) { |
|
589 return &((Normalizer2WithImpl *)norm2)->impl; |
|
590 } |
|
591 |
|
592 const Normalizer2 * |
|
593 Normalizer2::getNFCInstance(UErrorCode &errorCode) { |
|
594 return Normalizer2Factory::getNFCInstance(errorCode); |
|
595 } |
|
596 |
|
597 const Normalizer2 * |
|
598 Normalizer2::getNFDInstance(UErrorCode &errorCode) { |
|
599 return Normalizer2Factory::getNFDInstance(errorCode); |
|
600 } |
|
601 |
|
602 const Normalizer2 * |
|
603 Normalizer2::getNFKCInstance(UErrorCode &errorCode) { |
|
604 return Normalizer2Factory::getNFKCInstance(errorCode); |
|
605 } |
|
606 |
|
607 const Normalizer2 * |
|
608 Normalizer2::getNFKDInstance(UErrorCode &errorCode) { |
|
609 return Normalizer2Factory::getNFKDInstance(errorCode); |
|
610 } |
|
611 |
|
612 const Normalizer2 * |
|
613 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { |
|
614 return Normalizer2Factory::getNFKC_CFInstance(errorCode); |
|
615 } |
|
616 |
|
617 const Normalizer2 * |
|
618 Normalizer2::getInstance(const char *packageName, |
|
619 const char *name, |
|
620 UNormalization2Mode mode, |
|
621 UErrorCode &errorCode) { |
|
622 if(U_FAILURE(errorCode)) { |
|
623 return NULL; |
|
624 } |
|
625 if(name==NULL || *name==0) { |
|
626 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
627 return NULL; |
|
628 } |
|
629 Norm2AllModes *allModes=NULL; |
|
630 if(packageName==NULL) { |
|
631 if(0==uprv_strcmp(name, "nfc")) { |
|
632 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); |
|
633 allModes=nfcSingleton; |
|
634 } else if(0==uprv_strcmp(name, "nfkc")) { |
|
635 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); |
|
636 allModes=nfkcSingleton; |
|
637 } else if(0==uprv_strcmp(name, "nfkc_cf")) { |
|
638 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); |
|
639 allModes=nfkc_cfSingleton; |
|
640 } |
|
641 } |
|
642 if(allModes==NULL && U_SUCCESS(errorCode)) { |
|
643 { |
|
644 Mutex lock; |
|
645 if(cache!=NULL) { |
|
646 allModes=(Norm2AllModes *)uhash_get(cache, name); |
|
647 } |
|
648 } |
|
649 if(allModes==NULL) { |
|
650 LocalPointer<Norm2AllModes> localAllModes( |
|
651 Norm2AllModes::createInstance(packageName, name, errorCode)); |
|
652 if(U_SUCCESS(errorCode)) { |
|
653 Mutex lock; |
|
654 if(cache==NULL) { |
|
655 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); |
|
656 if(U_FAILURE(errorCode)) { |
|
657 return NULL; |
|
658 } |
|
659 uhash_setKeyDeleter(cache, uprv_free); |
|
660 uhash_setValueDeleter(cache, deleteNorm2AllModes); |
|
661 } |
|
662 void *temp=uhash_get(cache, name); |
|
663 if(temp==NULL) { |
|
664 int32_t keyLength=uprv_strlen(name)+1; |
|
665 char *nameCopy=(char *)uprv_malloc(keyLength); |
|
666 if(nameCopy==NULL) { |
|
667 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
668 return NULL; |
|
669 } |
|
670 uprv_memcpy(nameCopy, name, keyLength); |
|
671 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); |
|
672 } else { |
|
673 // race condition |
|
674 allModes=(Norm2AllModes *)temp; |
|
675 } |
|
676 } |
|
677 } |
|
678 } |
|
679 if(allModes!=NULL && U_SUCCESS(errorCode)) { |
|
680 switch(mode) { |
|
681 case UNORM2_COMPOSE: |
|
682 return &allModes->comp; |
|
683 case UNORM2_DECOMPOSE: |
|
684 return &allModes->decomp; |
|
685 case UNORM2_FCD: |
|
686 return &allModes->fcd; |
|
687 case UNORM2_COMPOSE_CONTIGUOUS: |
|
688 return &allModes->fcc; |
|
689 default: |
|
690 break; // do nothing |
|
691 } |
|
692 } |
|
693 return NULL; |
|
694 } |
|
695 |
|
696 U_NAMESPACE_END |
|
697 |
|
698 // C API ------------------------------------------------------------------- *** |
|
699 |
|
700 U_NAMESPACE_USE |
|
701 |
|
702 U_CAPI const UNormalizer2 * U_EXPORT2 |
|
703 unorm2_getNFCInstance(UErrorCode *pErrorCode) { |
|
704 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); |
|
705 } |
|
706 |
|
707 U_CAPI const UNormalizer2 * U_EXPORT2 |
|
708 unorm2_getNFDInstance(UErrorCode *pErrorCode) { |
|
709 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); |
|
710 } |
|
711 |
|
712 U_CAPI const UNormalizer2 * U_EXPORT2 |
|
713 unorm2_getNFKCInstance(UErrorCode *pErrorCode) { |
|
714 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); |
|
715 } |
|
716 |
|
717 U_CAPI const UNormalizer2 * U_EXPORT2 |
|
718 unorm2_getNFKDInstance(UErrorCode *pErrorCode) { |
|
719 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); |
|
720 } |
|
721 |
|
722 U_CAPI const UNormalizer2 * U_EXPORT2 |
|
723 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { |
|
724 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); |
|
725 } |
|
726 |
|
727 U_CAPI const UNormalizer2 * U_EXPORT2 |
|
728 unorm2_getInstance(const char *packageName, |
|
729 const char *name, |
|
730 UNormalization2Mode mode, |
|
731 UErrorCode *pErrorCode) { |
|
732 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); |
|
733 } |
|
734 |
|
735 U_CAPI void U_EXPORT2 |
|
736 unorm2_close(UNormalizer2 *norm2) { |
|
737 delete (Normalizer2 *)norm2; |
|
738 } |
|
739 |
|
740 U_CAPI int32_t U_EXPORT2 |
|
741 unorm2_normalize(const UNormalizer2 *norm2, |
|
742 const UChar *src, int32_t length, |
|
743 UChar *dest, int32_t capacity, |
|
744 UErrorCode *pErrorCode) { |
|
745 if(U_FAILURE(*pErrorCode)) { |
|
746 return 0; |
|
747 } |
|
748 if( (src==NULL ? length!=0 : length<-1) || |
|
749 (dest==NULL ? capacity!=0 : capacity<0) || |
|
750 (src==dest && src!=NULL) |
|
751 ) { |
|
752 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
753 return 0; |
|
754 } |
|
755 UnicodeString destString(dest, 0, capacity); |
|
756 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. |
|
757 if(length!=0) { |
|
758 const Normalizer2 *n2=(const Normalizer2 *)norm2; |
|
759 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); |
|
760 if(n2wi!=NULL) { |
|
761 // Avoid duplicate argument checking and support NUL-terminated src. |
|
762 ReorderingBuffer buffer(n2wi->impl, destString); |
|
763 if(buffer.init(length, *pErrorCode)) { |
|
764 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); |
|
765 } |
|
766 } else { |
|
767 UnicodeString srcString(length<0, src, length); |
|
768 n2->normalize(srcString, destString, *pErrorCode); |
|
769 } |
|
770 } |
|
771 return destString.extract(dest, capacity, *pErrorCode); |
|
772 } |
|
773 |
|
774 static int32_t |
|
775 normalizeSecondAndAppend(const UNormalizer2 *norm2, |
|
776 UChar *first, int32_t firstLength, int32_t firstCapacity, |
|
777 const UChar *second, int32_t secondLength, |
|
778 UBool doNormalize, |
|
779 UErrorCode *pErrorCode) { |
|
780 if(U_FAILURE(*pErrorCode)) { |
|
781 return 0; |
|
782 } |
|
783 if( (second==NULL ? secondLength!=0 : secondLength<-1) || |
|
784 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : |
|
785 (firstCapacity<0 || firstLength<-1)) || |
|
786 (first==second && first!=NULL) |
|
787 ) { |
|
788 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
789 return 0; |
|
790 } |
|
791 UnicodeString firstString(first, firstLength, firstCapacity); |
|
792 firstLength=firstString.length(); // In case it was -1. |
|
793 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. |
|
794 if(secondLength!=0) { |
|
795 const Normalizer2 *n2=(const Normalizer2 *)norm2; |
|
796 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); |
|
797 if(n2wi!=NULL) { |
|
798 // Avoid duplicate argument checking and support NUL-terminated src. |
|
799 UnicodeString safeMiddle; |
|
800 { |
|
801 ReorderingBuffer buffer(n2wi->impl, firstString); |
|
802 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 |
|
803 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, |
|
804 doNormalize, safeMiddle, buffer, *pErrorCode); |
|
805 } |
|
806 } // The ReorderingBuffer destructor finalizes firstString. |
|
807 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { |
|
808 // Restore the modified suffix of the first string. |
|
809 // This does not restore first[] array contents between firstLength and firstCapacity. |
|
810 // (That might be uninitialized memory, as far as we know.) |
|
811 if(first!=NULL) { /* don't dereference NULL */ |
|
812 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); |
|
813 if(firstLength<firstCapacity) { |
|
814 first[firstLength]=0; // NUL-terminate in case it was originally. |
|
815 } |
|
816 } |
|
817 } |
|
818 } else { |
|
819 UnicodeString secondString(secondLength<0, second, secondLength); |
|
820 if(doNormalize) { |
|
821 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); |
|
822 } else { |
|
823 n2->append(firstString, secondString, *pErrorCode); |
|
824 } |
|
825 } |
|
826 } |
|
827 return firstString.extract(first, firstCapacity, *pErrorCode); |
|
828 } |
|
829 |
|
830 U_CAPI int32_t U_EXPORT2 |
|
831 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, |
|
832 UChar *first, int32_t firstLength, int32_t firstCapacity, |
|
833 const UChar *second, int32_t secondLength, |
|
834 UErrorCode *pErrorCode) { |
|
835 return normalizeSecondAndAppend(norm2, |
|
836 first, firstLength, firstCapacity, |
|
837 second, secondLength, |
|
838 TRUE, pErrorCode); |
|
839 } |
|
840 |
|
841 U_CAPI int32_t U_EXPORT2 |
|
842 unorm2_append(const UNormalizer2 *norm2, |
|
843 UChar *first, int32_t firstLength, int32_t firstCapacity, |
|
844 const UChar *second, int32_t secondLength, |
|
845 UErrorCode *pErrorCode) { |
|
846 return normalizeSecondAndAppend(norm2, |
|
847 first, firstLength, firstCapacity, |
|
848 second, secondLength, |
|
849 FALSE, pErrorCode); |
|
850 } |
|
851 |
|
852 U_CAPI int32_t U_EXPORT2 |
|
853 unorm2_getDecomposition(const UNormalizer2 *norm2, |
|
854 UChar32 c, UChar *decomposition, int32_t capacity, |
|
855 UErrorCode *pErrorCode) { |
|
856 if(U_FAILURE(*pErrorCode)) { |
|
857 return 0; |
|
858 } |
|
859 if(decomposition==NULL ? capacity!=0 : capacity<0) { |
|
860 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
861 return 0; |
|
862 } |
|
863 UnicodeString destString(decomposition, 0, capacity); |
|
864 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { |
|
865 return destString.extract(decomposition, capacity, *pErrorCode); |
|
866 } else { |
|
867 return -1; |
|
868 } |
|
869 } |
|
870 |
|
871 U_CAPI int32_t U_EXPORT2 |
|
872 unorm2_getRawDecomposition(const UNormalizer2 *norm2, |
|
873 UChar32 c, UChar *decomposition, int32_t capacity, |
|
874 UErrorCode *pErrorCode) { |
|
875 if(U_FAILURE(*pErrorCode)) { |
|
876 return 0; |
|
877 } |
|
878 if(decomposition==NULL ? capacity!=0 : capacity<0) { |
|
879 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
880 return 0; |
|
881 } |
|
882 UnicodeString destString(decomposition, 0, capacity); |
|
883 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) { |
|
884 return destString.extract(decomposition, capacity, *pErrorCode); |
|
885 } else { |
|
886 return -1; |
|
887 } |
|
888 } |
|
889 |
|
890 U_CAPI UChar32 U_EXPORT2 |
|
891 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { |
|
892 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b); |
|
893 } |
|
894 |
|
895 U_CAPI uint8_t U_EXPORT2 |
|
896 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { |
|
897 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c); |
|
898 } |
|
899 |
|
900 U_CAPI UBool U_EXPORT2 |
|
901 unorm2_isNormalized(const UNormalizer2 *norm2, |
|
902 const UChar *s, int32_t length, |
|
903 UErrorCode *pErrorCode) { |
|
904 if(U_FAILURE(*pErrorCode)) { |
|
905 return 0; |
|
906 } |
|
907 if((s==NULL && length!=0) || length<-1) { |
|
908 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
909 return 0; |
|
910 } |
|
911 UnicodeString sString(length<0, s, length); |
|
912 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); |
|
913 } |
|
914 |
|
915 U_CAPI UNormalizationCheckResult U_EXPORT2 |
|
916 unorm2_quickCheck(const UNormalizer2 *norm2, |
|
917 const UChar *s, int32_t length, |
|
918 UErrorCode *pErrorCode) { |
|
919 if(U_FAILURE(*pErrorCode)) { |
|
920 return UNORM_NO; |
|
921 } |
|
922 if((s==NULL && length!=0) || length<-1) { |
|
923 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
924 return UNORM_NO; |
|
925 } |
|
926 UnicodeString sString(length<0, s, length); |
|
927 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); |
|
928 } |
|
929 |
|
930 U_CAPI int32_t U_EXPORT2 |
|
931 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, |
|
932 const UChar *s, int32_t length, |
|
933 UErrorCode *pErrorCode) { |
|
934 if(U_FAILURE(*pErrorCode)) { |
|
935 return 0; |
|
936 } |
|
937 if((s==NULL && length!=0) || length<-1) { |
|
938 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
939 return 0; |
|
940 } |
|
941 UnicodeString sString(length<0, s, length); |
|
942 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); |
|
943 } |
|
944 |
|
945 U_CAPI UBool U_EXPORT2 |
|
946 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { |
|
947 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); |
|
948 } |
|
949 |
|
950 U_CAPI UBool U_EXPORT2 |
|
951 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { |
|
952 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); |
|
953 } |
|
954 |
|
955 U_CAPI UBool U_EXPORT2 |
|
956 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { |
|
957 return ((const Normalizer2 *)norm2)->isInert(c); |
|
958 } |
|
959 |
|
960 // Some properties APIs ---------------------------------------------------- *** |
|
961 |
|
962 U_CAPI uint8_t U_EXPORT2 |
|
963 u_getCombiningClass(UChar32 c) { |
|
964 UErrorCode errorCode=U_ZERO_ERROR; |
|
965 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); |
|
966 if(U_SUCCESS(errorCode)) { |
|
967 return nfd->getCombiningClass(c); |
|
968 } else { |
|
969 return 0; |
|
970 } |
|
971 } |
|
972 |
|
973 U_CFUNC UNormalizationCheckResult |
|
974 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { |
|
975 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { |
|
976 return UNORM_YES; |
|
977 } |
|
978 UErrorCode errorCode=U_ZERO_ERROR; |
|
979 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); |
|
980 if(U_SUCCESS(errorCode)) { |
|
981 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); |
|
982 } else { |
|
983 return UNORM_MAYBE; |
|
984 } |
|
985 } |
|
986 |
|
987 U_CFUNC uint16_t |
|
988 unorm_getFCD16(UChar32 c) { |
|
989 UErrorCode errorCode=U_ZERO_ERROR; |
|
990 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
|
991 if(U_SUCCESS(errorCode)) { |
|
992 return impl->getFCD16(c); |
|
993 } else { |
|
994 return 0; |
|
995 } |
|
996 } |
|
997 |
|
998 #endif // !UCONFIG_NO_NORMALIZATION |