|
1 /* |
|
2 ******************************************************************************** |
|
3 * Copyright (C) 1996-2013, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************** |
|
6 */ |
|
7 |
|
8 #include "unicode/utypes.h" |
|
9 |
|
10 #if !UCONFIG_NO_BREAK_ITERATION |
|
11 |
|
12 #include "unicode/ubrk.h" |
|
13 |
|
14 #include "unicode/brkiter.h" |
|
15 #include "unicode/uloc.h" |
|
16 #include "unicode/ustring.h" |
|
17 #include "unicode/uchriter.h" |
|
18 #include "unicode/rbbi.h" |
|
19 #include "rbbirb.h" |
|
20 #include "uassert.h" |
|
21 |
|
22 U_NAMESPACE_USE |
|
23 |
|
24 //------------------------------------------------------------------------------ |
|
25 // |
|
26 // ubrk_open Create a canned type of break iterator based on type (word, line, etc.) |
|
27 // and locale. |
|
28 // |
|
29 //------------------------------------------------------------------------------ |
|
30 U_CAPI UBreakIterator* U_EXPORT2 |
|
31 ubrk_open(UBreakIteratorType type, |
|
32 const char *locale, |
|
33 const UChar *text, |
|
34 int32_t textLength, |
|
35 UErrorCode *status) |
|
36 { |
|
37 |
|
38 if(U_FAILURE(*status)) return 0; |
|
39 |
|
40 BreakIterator *result = 0; |
|
41 |
|
42 switch(type) { |
|
43 |
|
44 case UBRK_CHARACTER: |
|
45 result = BreakIterator::createCharacterInstance(Locale(locale), *status); |
|
46 break; |
|
47 |
|
48 case UBRK_WORD: |
|
49 result = BreakIterator::createWordInstance(Locale(locale), *status); |
|
50 break; |
|
51 |
|
52 case UBRK_LINE: |
|
53 result = BreakIterator::createLineInstance(Locale(locale), *status); |
|
54 break; |
|
55 |
|
56 case UBRK_SENTENCE: |
|
57 result = BreakIterator::createSentenceInstance(Locale(locale), *status); |
|
58 break; |
|
59 |
|
60 case UBRK_TITLE: |
|
61 result = BreakIterator::createTitleInstance(Locale(locale), *status); |
|
62 break; |
|
63 |
|
64 default: |
|
65 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
66 } |
|
67 |
|
68 // check for allocation error |
|
69 if (U_FAILURE(*status)) { |
|
70 return 0; |
|
71 } |
|
72 if(result == 0) { |
|
73 *status = U_MEMORY_ALLOCATION_ERROR; |
|
74 return 0; |
|
75 } |
|
76 |
|
77 |
|
78 UBreakIterator *uBI = (UBreakIterator *)result; |
|
79 if (text != NULL) { |
|
80 ubrk_setText(uBI, text, textLength, status); |
|
81 } |
|
82 return uBI; |
|
83 } |
|
84 |
|
85 |
|
86 |
|
87 //------------------------------------------------------------------------------ |
|
88 // |
|
89 // ubrk_openRules open a break iterator from a set of break rules. |
|
90 // Invokes the rule builder. |
|
91 // |
|
92 //------------------------------------------------------------------------------ |
|
93 U_CAPI UBreakIterator* U_EXPORT2 |
|
94 ubrk_openRules( const UChar *rules, |
|
95 int32_t rulesLength, |
|
96 const UChar *text, |
|
97 int32_t textLength, |
|
98 UParseError *parseErr, |
|
99 UErrorCode *status) { |
|
100 |
|
101 if (status == NULL || U_FAILURE(*status)){ |
|
102 return 0; |
|
103 } |
|
104 |
|
105 BreakIterator *result = 0; |
|
106 UnicodeString ruleString(rules, rulesLength); |
|
107 result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status); |
|
108 if(U_FAILURE(*status)) { |
|
109 return 0; |
|
110 } |
|
111 |
|
112 UBreakIterator *uBI = (UBreakIterator *)result; |
|
113 if (text != NULL) { |
|
114 ubrk_setText(uBI, text, textLength, status); |
|
115 } |
|
116 return uBI; |
|
117 } |
|
118 |
|
119 |
|
120 |
|
121 |
|
122 |
|
123 U_CAPI UBreakIterator * U_EXPORT2 |
|
124 ubrk_safeClone( |
|
125 const UBreakIterator *bi, |
|
126 void * /*stackBuffer*/, |
|
127 int32_t *pBufferSize, |
|
128 UErrorCode *status) |
|
129 { |
|
130 if (status == NULL || U_FAILURE(*status)){ |
|
131 return NULL; |
|
132 } |
|
133 if (bi == NULL) { |
|
134 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
135 return NULL; |
|
136 } |
|
137 if (pBufferSize != NULL) { |
|
138 int32_t inputSize = *pBufferSize; |
|
139 *pBufferSize = 1; |
|
140 if (inputSize == 0) { |
|
141 return NULL; // preflighting for deprecated functionality |
|
142 } |
|
143 } |
|
144 BreakIterator *newBI = ((BreakIterator *)bi)->clone(); |
|
145 if (newBI == NULL) { |
|
146 *status = U_MEMORY_ALLOCATION_ERROR; |
|
147 } else { |
|
148 *status = U_SAFECLONE_ALLOCATED_WARNING; |
|
149 } |
|
150 return (UBreakIterator *)newBI; |
|
151 } |
|
152 |
|
153 |
|
154 |
|
155 U_CAPI void U_EXPORT2 |
|
156 ubrk_close(UBreakIterator *bi) |
|
157 { |
|
158 delete (BreakIterator *)bi; |
|
159 } |
|
160 |
|
161 U_CAPI void U_EXPORT2 |
|
162 ubrk_setText(UBreakIterator* bi, |
|
163 const UChar* text, |
|
164 int32_t textLength, |
|
165 UErrorCode* status) |
|
166 { |
|
167 BreakIterator *brit = (BreakIterator *)bi; |
|
168 UText ut = UTEXT_INITIALIZER; |
|
169 utext_openUChars(&ut, text, textLength, status); |
|
170 brit->setText(&ut, *status); |
|
171 // A stack allocated UText wrapping a UChar * string |
|
172 // can be dumped without explicitly closing it. |
|
173 } |
|
174 |
|
175 |
|
176 |
|
177 U_CAPI void U_EXPORT2 |
|
178 ubrk_setUText(UBreakIterator *bi, |
|
179 UText *text, |
|
180 UErrorCode *status) |
|
181 { |
|
182 RuleBasedBreakIterator *brit = (RuleBasedBreakIterator *)bi; |
|
183 brit->RuleBasedBreakIterator::setText(text, *status); |
|
184 } |
|
185 |
|
186 |
|
187 |
|
188 |
|
189 |
|
190 U_CAPI int32_t U_EXPORT2 |
|
191 ubrk_current(const UBreakIterator *bi) |
|
192 { |
|
193 |
|
194 return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::current(); |
|
195 } |
|
196 |
|
197 U_CAPI int32_t U_EXPORT2 |
|
198 ubrk_next(UBreakIterator *bi) |
|
199 { |
|
200 |
|
201 return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::next(); |
|
202 } |
|
203 |
|
204 U_CAPI int32_t U_EXPORT2 |
|
205 ubrk_previous(UBreakIterator *bi) |
|
206 { |
|
207 |
|
208 return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::previous(); |
|
209 } |
|
210 |
|
211 U_CAPI int32_t U_EXPORT2 |
|
212 ubrk_first(UBreakIterator *bi) |
|
213 { |
|
214 |
|
215 return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::first(); |
|
216 } |
|
217 |
|
218 U_CAPI int32_t U_EXPORT2 |
|
219 ubrk_last(UBreakIterator *bi) |
|
220 { |
|
221 |
|
222 return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::last(); |
|
223 } |
|
224 |
|
225 U_CAPI int32_t U_EXPORT2 |
|
226 ubrk_preceding(UBreakIterator *bi, |
|
227 int32_t offset) |
|
228 { |
|
229 |
|
230 return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::preceding(offset); |
|
231 } |
|
232 |
|
233 U_CAPI int32_t U_EXPORT2 |
|
234 ubrk_following(UBreakIterator *bi, |
|
235 int32_t offset) |
|
236 { |
|
237 |
|
238 return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::following(offset); |
|
239 } |
|
240 |
|
241 U_CAPI const char* U_EXPORT2 |
|
242 ubrk_getAvailable(int32_t index) |
|
243 { |
|
244 |
|
245 return uloc_getAvailable(index); |
|
246 } |
|
247 |
|
248 U_CAPI int32_t U_EXPORT2 |
|
249 ubrk_countAvailable() |
|
250 { |
|
251 |
|
252 return uloc_countAvailable(); |
|
253 } |
|
254 |
|
255 |
|
256 U_CAPI UBool U_EXPORT2 |
|
257 ubrk_isBoundary(UBreakIterator *bi, int32_t offset) |
|
258 { |
|
259 return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::isBoundary(offset); |
|
260 } |
|
261 |
|
262 |
|
263 U_CAPI int32_t U_EXPORT2 |
|
264 ubrk_getRuleStatus(UBreakIterator *bi) |
|
265 { |
|
266 return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::getRuleStatus(); |
|
267 } |
|
268 |
|
269 U_CAPI int32_t U_EXPORT2 |
|
270 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status) |
|
271 { |
|
272 return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::getRuleStatusVec(fillInVec, capacity, *status); |
|
273 } |
|
274 |
|
275 |
|
276 U_CAPI const char* U_EXPORT2 |
|
277 ubrk_getLocaleByType(const UBreakIterator *bi, |
|
278 ULocDataLocaleType type, |
|
279 UErrorCode* status) |
|
280 { |
|
281 if (bi == NULL) { |
|
282 if (U_SUCCESS(*status)) { |
|
283 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
284 } |
|
285 return NULL; |
|
286 } |
|
287 return ((BreakIterator*)bi)->getLocaleID(type, *status); |
|
288 } |
|
289 |
|
290 |
|
291 void ubrk_refreshUText(UBreakIterator *bi, |
|
292 UText *text, |
|
293 UErrorCode *status) |
|
294 { |
|
295 BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi); |
|
296 bii->refreshInputText(text, *status); |
|
297 } |
|
298 |
|
299 |
|
300 |
|
301 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |