|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 1997-2013, International Business Machines Corporation and |
|
4 * others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * |
|
7 * File TXTBDRY.CPP |
|
8 * |
|
9 * Modification History: |
|
10 * |
|
11 * Date Name Description |
|
12 * 02/18/97 aliu Converted from OpenClass. Added DONE. |
|
13 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. |
|
14 ***************************************************************************************** |
|
15 */ |
|
16 |
|
17 // ***************************************************************************** |
|
18 // This file was generated from the java source file BreakIterator.java |
|
19 // ***************************************************************************** |
|
20 |
|
21 #include "unicode/utypes.h" |
|
22 |
|
23 #if !UCONFIG_NO_BREAK_ITERATION |
|
24 |
|
25 #include "unicode/rbbi.h" |
|
26 #include "unicode/brkiter.h" |
|
27 #include "unicode/udata.h" |
|
28 #include "unicode/ures.h" |
|
29 #include "unicode/ustring.h" |
|
30 #include "ucln_cmn.h" |
|
31 #include "cstring.h" |
|
32 #include "umutex.h" |
|
33 #include "servloc.h" |
|
34 #include "locbased.h" |
|
35 #include "uresimp.h" |
|
36 #include "uassert.h" |
|
37 #include "ubrkimpl.h" |
|
38 |
|
39 // ***************************************************************************** |
|
40 // class BreakIterator |
|
41 // This class implements methods for finding the location of boundaries in text. |
|
42 // Instances of BreakIterator maintain a current position and scan over text |
|
43 // returning the index of characters where boundaries occur. |
|
44 // ***************************************************************************** |
|
45 |
|
46 U_NAMESPACE_BEGIN |
|
47 |
|
48 // ------------------------------------- |
|
49 |
|
50 BreakIterator* |
|
51 BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) |
|
52 { |
|
53 char fnbuff[256]; |
|
54 char ext[4]={'\0'}; |
|
55 char actualLocale[ULOC_FULLNAME_CAPACITY]; |
|
56 int32_t size; |
|
57 const UChar* brkfname = NULL; |
|
58 UResourceBundle brkRulesStack; |
|
59 UResourceBundle brkNameStack; |
|
60 UResourceBundle *brkRules = &brkRulesStack; |
|
61 UResourceBundle *brkName = &brkNameStack; |
|
62 RuleBasedBreakIterator *result = NULL; |
|
63 |
|
64 if (U_FAILURE(status)) |
|
65 return NULL; |
|
66 |
|
67 ures_initStackObject(brkRules); |
|
68 ures_initStackObject(brkName); |
|
69 |
|
70 // Get the locale |
|
71 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status); |
|
72 /* this is a hack for now. Should be fixed when the data is fetched from |
|
73 brk_index.txt */ |
|
74 if(status==U_USING_DEFAULT_WARNING){ |
|
75 status=U_ZERO_ERROR; |
|
76 ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status); |
|
77 } |
|
78 |
|
79 // Get the "boundaries" array. |
|
80 if (U_SUCCESS(status)) { |
|
81 brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); |
|
82 // Get the string object naming the rules file |
|
83 brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); |
|
84 // Get the actual string |
|
85 brkfname = ures_getString(brkName, &size, &status); |
|
86 U_ASSERT((size_t)size<sizeof(fnbuff)); |
|
87 if ((size_t)size>=sizeof(fnbuff)) { |
|
88 size=0; |
|
89 if (U_SUCCESS(status)) { |
|
90 status = U_BUFFER_OVERFLOW_ERROR; |
|
91 } |
|
92 } |
|
93 |
|
94 // Use the string if we found it |
|
95 if (U_SUCCESS(status) && brkfname) { |
|
96 uprv_strncpy(actualLocale, |
|
97 ures_getLocaleInternal(brkName, &status), |
|
98 sizeof(actualLocale)/sizeof(actualLocale[0])); |
|
99 |
|
100 UChar* extStart=u_strchr(brkfname, 0x002e); |
|
101 int len = 0; |
|
102 if(extStart!=NULL){ |
|
103 len = (int)(extStart-brkfname); |
|
104 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff |
|
105 u_UCharsToChars(brkfname, fnbuff, len); |
|
106 } |
|
107 fnbuff[len]=0; // nul terminate |
|
108 } |
|
109 } |
|
110 |
|
111 ures_close(brkRules); |
|
112 ures_close(brkName); |
|
113 |
|
114 UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); |
|
115 if (U_FAILURE(status)) { |
|
116 ures_close(b); |
|
117 return NULL; |
|
118 } |
|
119 |
|
120 // Create a RuleBasedBreakIterator |
|
121 result = new RuleBasedBreakIterator(file, status); |
|
122 |
|
123 // If there is a result, set the valid locale and actual locale, and the kind |
|
124 if (U_SUCCESS(status) && result != NULL) { |
|
125 U_LOCALE_BASED(locBased, *(BreakIterator*)result); |
|
126 locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); |
|
127 result->setBreakType(kind); |
|
128 } |
|
129 |
|
130 ures_close(b); |
|
131 |
|
132 if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple |
|
133 delete result; |
|
134 return NULL; |
|
135 } |
|
136 |
|
137 if (result == NULL) { |
|
138 udata_close(file); |
|
139 if (U_SUCCESS(status)) { |
|
140 status = U_MEMORY_ALLOCATION_ERROR; |
|
141 } |
|
142 } |
|
143 |
|
144 return result; |
|
145 } |
|
146 |
|
147 // Creates a break iterator for word breaks. |
|
148 BreakIterator* U_EXPORT2 |
|
149 BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) |
|
150 { |
|
151 return createInstance(key, UBRK_WORD, status); |
|
152 } |
|
153 |
|
154 // ------------------------------------- |
|
155 |
|
156 // Creates a break iterator for line breaks. |
|
157 BreakIterator* U_EXPORT2 |
|
158 BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) |
|
159 { |
|
160 return createInstance(key, UBRK_LINE, status); |
|
161 } |
|
162 |
|
163 // ------------------------------------- |
|
164 |
|
165 // Creates a break iterator for character breaks. |
|
166 BreakIterator* U_EXPORT2 |
|
167 BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) |
|
168 { |
|
169 return createInstance(key, UBRK_CHARACTER, status); |
|
170 } |
|
171 |
|
172 // ------------------------------------- |
|
173 |
|
174 // Creates a break iterator for sentence breaks. |
|
175 BreakIterator* U_EXPORT2 |
|
176 BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) |
|
177 { |
|
178 return createInstance(key, UBRK_SENTENCE, status); |
|
179 } |
|
180 |
|
181 // ------------------------------------- |
|
182 |
|
183 // Creates a break iterator for title casing breaks. |
|
184 BreakIterator* U_EXPORT2 |
|
185 BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) |
|
186 { |
|
187 return createInstance(key, UBRK_TITLE, status); |
|
188 } |
|
189 |
|
190 // ------------------------------------- |
|
191 |
|
192 // Gets all the available locales that has localized text boundary data. |
|
193 const Locale* U_EXPORT2 |
|
194 BreakIterator::getAvailableLocales(int32_t& count) |
|
195 { |
|
196 return Locale::getAvailableLocales(count); |
|
197 } |
|
198 |
|
199 // ------------------------------------------ |
|
200 // |
|
201 // Default constructor and destructor |
|
202 // |
|
203 //------------------------------------------- |
|
204 |
|
205 BreakIterator::BreakIterator() |
|
206 { |
|
207 *validLocale = *actualLocale = 0; |
|
208 } |
|
209 |
|
210 BreakIterator::~BreakIterator() |
|
211 { |
|
212 } |
|
213 |
|
214 // ------------------------------------------ |
|
215 // |
|
216 // Registration |
|
217 // |
|
218 //------------------------------------------- |
|
219 #if !UCONFIG_NO_SERVICE |
|
220 |
|
221 // ------------------------------------- |
|
222 |
|
223 class ICUBreakIteratorFactory : public ICUResourceBundleFactory { |
|
224 public: |
|
225 virtual ~ICUBreakIteratorFactory(); |
|
226 protected: |
|
227 virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { |
|
228 return BreakIterator::makeInstance(loc, kind, status); |
|
229 } |
|
230 }; |
|
231 |
|
232 ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} |
|
233 |
|
234 // ------------------------------------- |
|
235 |
|
236 class ICUBreakIteratorService : public ICULocaleService { |
|
237 public: |
|
238 ICUBreakIteratorService() |
|
239 : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) |
|
240 { |
|
241 UErrorCode status = U_ZERO_ERROR; |
|
242 registerFactory(new ICUBreakIteratorFactory(), status); |
|
243 } |
|
244 |
|
245 virtual ~ICUBreakIteratorService(); |
|
246 |
|
247 virtual UObject* cloneInstance(UObject* instance) const { |
|
248 return ((BreakIterator*)instance)->clone(); |
|
249 } |
|
250 |
|
251 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { |
|
252 LocaleKey& lkey = (LocaleKey&)key; |
|
253 int32_t kind = lkey.kind(); |
|
254 Locale loc; |
|
255 lkey.currentLocale(loc); |
|
256 return BreakIterator::makeInstance(loc, kind, status); |
|
257 } |
|
258 |
|
259 virtual UBool isDefault() const { |
|
260 return countFactories() == 1; |
|
261 } |
|
262 }; |
|
263 |
|
264 ICUBreakIteratorService::~ICUBreakIteratorService() {} |
|
265 |
|
266 // ------------------------------------- |
|
267 |
|
268 // defined in ucln_cmn.h |
|
269 U_NAMESPACE_END |
|
270 |
|
271 static icu::UInitOnce gInitOnce; |
|
272 static icu::ICULocaleService* gService = NULL; |
|
273 |
|
274 |
|
275 |
|
276 /** |
|
277 * Release all static memory held by breakiterator. |
|
278 */ |
|
279 U_CDECL_BEGIN |
|
280 static UBool U_CALLCONV breakiterator_cleanup(void) { |
|
281 #if !UCONFIG_NO_SERVICE |
|
282 if (gService) { |
|
283 delete gService; |
|
284 gService = NULL; |
|
285 } |
|
286 gInitOnce.reset(); |
|
287 #endif |
|
288 return TRUE; |
|
289 } |
|
290 U_CDECL_END |
|
291 U_NAMESPACE_BEGIN |
|
292 |
|
293 static void U_CALLCONV |
|
294 initService(void) { |
|
295 gService = new ICUBreakIteratorService(); |
|
296 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); |
|
297 } |
|
298 |
|
299 static ICULocaleService* |
|
300 getService(void) |
|
301 { |
|
302 umtx_initOnce(gInitOnce, &initService); |
|
303 return gService; |
|
304 } |
|
305 |
|
306 |
|
307 // ------------------------------------- |
|
308 |
|
309 static inline UBool |
|
310 hasService(void) |
|
311 { |
|
312 return !gInitOnce.isReset() && getService() != NULL; |
|
313 } |
|
314 |
|
315 // ------------------------------------- |
|
316 |
|
317 URegistryKey U_EXPORT2 |
|
318 BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) |
|
319 { |
|
320 ICULocaleService *service = getService(); |
|
321 if (service == NULL) { |
|
322 status = U_MEMORY_ALLOCATION_ERROR; |
|
323 return NULL; |
|
324 } |
|
325 return service->registerInstance(toAdopt, locale, kind, status); |
|
326 } |
|
327 |
|
328 // ------------------------------------- |
|
329 |
|
330 UBool U_EXPORT2 |
|
331 BreakIterator::unregister(URegistryKey key, UErrorCode& status) |
|
332 { |
|
333 if (U_SUCCESS(status)) { |
|
334 if (hasService()) { |
|
335 return gService->unregister(key, status); |
|
336 } |
|
337 status = U_MEMORY_ALLOCATION_ERROR; |
|
338 } |
|
339 return FALSE; |
|
340 } |
|
341 |
|
342 // ------------------------------------- |
|
343 |
|
344 StringEnumeration* U_EXPORT2 |
|
345 BreakIterator::getAvailableLocales(void) |
|
346 { |
|
347 ICULocaleService *service = getService(); |
|
348 if (service == NULL) { |
|
349 return NULL; |
|
350 } |
|
351 return service->getAvailableLocales(); |
|
352 } |
|
353 #endif /* UCONFIG_NO_SERVICE */ |
|
354 |
|
355 // ------------------------------------- |
|
356 |
|
357 BreakIterator* |
|
358 BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) |
|
359 { |
|
360 if (U_FAILURE(status)) { |
|
361 return NULL; |
|
362 } |
|
363 |
|
364 #if !UCONFIG_NO_SERVICE |
|
365 if (hasService()) { |
|
366 Locale actualLoc(""); |
|
367 BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); |
|
368 // TODO: The way the service code works in ICU 2.8 is that if |
|
369 // there is a real registered break iterator, the actualLoc |
|
370 // will be populated, but if the handleDefault path is taken |
|
371 // (because nothing is registered that can handle the |
|
372 // requested locale) then the actualLoc comes back empty. In |
|
373 // that case, the returned object already has its actual/valid |
|
374 // locale data populated (by makeInstance, which is what |
|
375 // handleDefault calls), so we don't touch it. YES, A COMMENT |
|
376 // THIS LONG is a sign of bad code -- so the action item is to |
|
377 // revisit this in ICU 3.0 and clean it up/fix it/remove it. |
|
378 if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { |
|
379 U_LOCALE_BASED(locBased, *result); |
|
380 locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); |
|
381 } |
|
382 return result; |
|
383 } |
|
384 else |
|
385 #endif |
|
386 { |
|
387 return makeInstance(loc, kind, status); |
|
388 } |
|
389 } |
|
390 |
|
391 // ------------------------------------- |
|
392 |
|
393 BreakIterator* |
|
394 BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) |
|
395 { |
|
396 |
|
397 if (U_FAILURE(status)) { |
|
398 return NULL; |
|
399 } |
|
400 |
|
401 BreakIterator *result = NULL; |
|
402 switch (kind) { |
|
403 case UBRK_CHARACTER: |
|
404 result = BreakIterator::buildInstance(loc, "grapheme", kind, status); |
|
405 break; |
|
406 case UBRK_WORD: |
|
407 result = BreakIterator::buildInstance(loc, "word", kind, status); |
|
408 break; |
|
409 case UBRK_LINE: |
|
410 result = BreakIterator::buildInstance(loc, "line", kind, status); |
|
411 break; |
|
412 case UBRK_SENTENCE: |
|
413 result = BreakIterator::buildInstance(loc, "sentence", kind, status); |
|
414 break; |
|
415 case UBRK_TITLE: |
|
416 result = BreakIterator::buildInstance(loc, "title", kind, status); |
|
417 break; |
|
418 default: |
|
419 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
420 } |
|
421 |
|
422 if (U_FAILURE(status)) { |
|
423 return NULL; |
|
424 } |
|
425 |
|
426 return result; |
|
427 } |
|
428 |
|
429 Locale |
|
430 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { |
|
431 U_LOCALE_BASED(locBased, *this); |
|
432 return locBased.getLocale(type, status); |
|
433 } |
|
434 |
|
435 const char * |
|
436 BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { |
|
437 U_LOCALE_BASED(locBased, *this); |
|
438 return locBased.getLocaleID(type, status); |
|
439 } |
|
440 |
|
441 |
|
442 // This implementation of getRuleStatus is a do-nothing stub, here to |
|
443 // provide a default implementation for any derived BreakIterator classes that |
|
444 // do not implement it themselves. |
|
445 int32_t BreakIterator::getRuleStatus() const { |
|
446 return 0; |
|
447 } |
|
448 |
|
449 // This implementation of getRuleStatusVec is a do-nothing stub, here to |
|
450 // provide a default implementation for any derived BreakIterator classes that |
|
451 // do not implement it themselves. |
|
452 int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { |
|
453 if (U_FAILURE(status)) { |
|
454 return 0; |
|
455 } |
|
456 if (capacity < 1) { |
|
457 status = U_BUFFER_OVERFLOW_ERROR; |
|
458 return 1; |
|
459 } |
|
460 *fillInVec = 0; |
|
461 return 1; |
|
462 } |
|
463 |
|
464 U_NAMESPACE_END |
|
465 |
|
466 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
|
467 |
|
468 //eof |