|
1 // |
|
2 // Copyright (C) 2012 International Business Machines Corporation |
|
3 // and others. All rights reserved. |
|
4 // |
|
5 // file: regeximp.cpp |
|
6 // |
|
7 // ICU Regular Expressions, |
|
8 // miscellaneous implementation functions. |
|
9 // |
|
10 |
|
11 #include "unicode/utypes.h" |
|
12 |
|
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
|
14 #include "regeximp.h" |
|
15 #include "unicode/utf16.h" |
|
16 |
|
17 U_NAMESPACE_BEGIN |
|
18 |
|
19 CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) : |
|
20 fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { |
|
21 fcsp = ucase_getSingleton(); |
|
22 } |
|
23 |
|
24 CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {} |
|
25 |
|
26 UChar32 CaseFoldingUTextIterator::next() { |
|
27 UChar32 foldedC; |
|
28 UChar32 originalC; |
|
29 if (fFoldChars == NULL) { |
|
30 // We are not in a string folding of an earlier character. |
|
31 // Start handling the next char from the input UText. |
|
32 originalC = UTEXT_NEXT32(&fUText); |
|
33 if (originalC == U_SENTINEL) { |
|
34 return originalC; |
|
35 } |
|
36 fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); |
|
37 if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { |
|
38 // input code point folds to a single code point, possibly itself. |
|
39 // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. |
|
40 if (fFoldLength < 0) { |
|
41 fFoldLength = ~fFoldLength; |
|
42 } |
|
43 foldedC = (UChar32)fFoldLength; |
|
44 fFoldChars = NULL; |
|
45 return foldedC; |
|
46 } |
|
47 // String foldings fall through here. |
|
48 fFoldIndex = 0; |
|
49 } |
|
50 |
|
51 U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); |
|
52 if (fFoldIndex >= fFoldLength) { |
|
53 fFoldChars = NULL; |
|
54 } |
|
55 return foldedC; |
|
56 } |
|
57 |
|
58 |
|
59 UBool CaseFoldingUTextIterator::inExpansion() { |
|
60 return fFoldChars != NULL; |
|
61 } |
|
62 |
|
63 |
|
64 |
|
65 CaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) : |
|
66 fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { |
|
67 fcsp = ucase_getSingleton(); |
|
68 } |
|
69 |
|
70 |
|
71 CaseFoldingUCharIterator::~CaseFoldingUCharIterator() {} |
|
72 |
|
73 |
|
74 UChar32 CaseFoldingUCharIterator::next() { |
|
75 UChar32 foldedC; |
|
76 UChar32 originalC; |
|
77 if (fFoldChars == NULL) { |
|
78 // We are not in a string folding of an earlier character. |
|
79 // Start handling the next char from the input UText. |
|
80 if (fIndex >= fLimit) { |
|
81 return U_SENTINEL; |
|
82 } |
|
83 U16_NEXT(fChars, fIndex, fLimit, originalC); |
|
84 |
|
85 fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); |
|
86 if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { |
|
87 // input code point folds to a single code point, possibly itself. |
|
88 // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. |
|
89 if (fFoldLength < 0) { |
|
90 fFoldLength = ~fFoldLength; |
|
91 } |
|
92 foldedC = (UChar32)fFoldLength; |
|
93 fFoldChars = NULL; |
|
94 return foldedC; |
|
95 } |
|
96 // String foldings fall through here. |
|
97 fFoldIndex = 0; |
|
98 } |
|
99 |
|
100 U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); |
|
101 if (fFoldIndex >= fFoldLength) { |
|
102 fFoldChars = NULL; |
|
103 } |
|
104 return foldedC; |
|
105 } |
|
106 |
|
107 |
|
108 UBool CaseFoldingUCharIterator::inExpansion() { |
|
109 return fFoldChars != NULL; |
|
110 } |
|
111 |
|
112 int64_t CaseFoldingUCharIterator::getIndex() { |
|
113 return fIndex; |
|
114 } |
|
115 |
|
116 |
|
117 U_NAMESPACE_END |
|
118 |
|
119 #endif |
|
120 |