|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2003-2007, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * |
|
7 * File prscmnts.cpp |
|
8 * |
|
9 * Modification History: |
|
10 * |
|
11 * Date Name Description |
|
12 * 08/22/2003 ram Creation. |
|
13 ******************************************************************************* |
|
14 */ |
|
15 |
|
16 #include "unicode/regex.h" |
|
17 #include "unicode/unistr.h" |
|
18 #include "unicode/parseerr.h" |
|
19 #include "prscmnts.h" |
|
20 #include <stdio.h> |
|
21 #include <stdlib.h> |
|
22 |
|
23 U_NAMESPACE_USE |
|
24 |
|
25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ |
|
26 |
|
27 #define MAX_SPLIT_STRINGS 20 |
|
28 |
|
29 const char *patternStrings[UPC_LIMIT]={ |
|
30 "^translate\\s*(.*)", |
|
31 "^note\\s*(.*)" |
|
32 }; |
|
33 |
|
34 U_CFUNC int32_t |
|
35 removeText(UChar *source, int32_t srcLen, |
|
36 UnicodeString patString,uint32_t options, |
|
37 UnicodeString replaceText, UErrorCode *status){ |
|
38 |
|
39 if(status == NULL || U_FAILURE(*status)){ |
|
40 return 0; |
|
41 } |
|
42 |
|
43 UnicodeString src(source, srcLen); |
|
44 |
|
45 RegexMatcher myMatcher(patString, src, options, *status); |
|
46 if(U_FAILURE(*status)){ |
|
47 return 0; |
|
48 } |
|
49 UnicodeString dest; |
|
50 |
|
51 |
|
52 dest = myMatcher.replaceAll(replaceText,*status); |
|
53 |
|
54 |
|
55 return dest.extract(source, srcLen, *status); |
|
56 |
|
57 } |
|
58 U_CFUNC int32_t |
|
59 trim(UChar *src, int32_t srcLen, UErrorCode *status){ |
|
60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines |
|
61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces |
|
62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes |
|
63 return srcLen; |
|
64 } |
|
65 |
|
66 U_CFUNC int32_t |
|
67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ |
|
68 srcLen = trim(source, srcLen, status); |
|
69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line |
|
70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status); |
|
71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines; |
|
72 } |
|
73 |
|
74 U_CFUNC int32_t |
|
75 getText(const UChar* source, int32_t srcLen, |
|
76 UChar** dest, int32_t destCapacity, |
|
77 UnicodeString patternString, |
|
78 UErrorCode* status){ |
|
79 |
|
80 if(status == NULL || U_FAILURE(*status)){ |
|
81 return 0; |
|
82 } |
|
83 |
|
84 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
|
85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status); |
|
86 UnicodeString src (source,srcLen); |
|
87 |
|
88 if (U_FAILURE(*status)) { |
|
89 return 0; |
|
90 } |
|
91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
|
92 |
|
93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
|
94 if (U_FAILURE(*status)) { |
|
95 return 0; |
|
96 } |
|
97 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ |
|
98 matcher.reset(stringArray[i]); |
|
99 if(matcher.lookingAt(*status)){ |
|
100 UnicodeString out = matcher.group(1, *status); |
|
101 |
|
102 return out.extract(*dest, destCapacity,*status); |
|
103 } |
|
104 } |
|
105 return 0; |
|
106 } |
|
107 |
|
108 |
|
109 #define AT_SIGN 0x0040 |
|
110 |
|
111 U_CFUNC int32_t |
|
112 getDescription( const UChar* source, int32_t srcLen, |
|
113 UChar** dest, int32_t destCapacity, |
|
114 UErrorCode* status){ |
|
115 if(status == NULL || U_FAILURE(*status)){ |
|
116 return 0; |
|
117 } |
|
118 |
|
119 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
|
120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
|
121 UnicodeString src(source, srcLen); |
|
122 |
|
123 if (U_FAILURE(*status)) { |
|
124 return 0; |
|
125 } |
|
126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); |
|
127 |
|
128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ |
|
129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); |
|
130 return trim(*dest, destLen, status); |
|
131 } |
|
132 return 0; |
|
133 } |
|
134 |
|
135 U_CFUNC int32_t |
|
136 getCount(const UChar* source, int32_t srcLen, |
|
137 UParseCommentsOption option, UErrorCode *status){ |
|
138 |
|
139 if(status == NULL || U_FAILURE(*status)){ |
|
140 return 0; |
|
141 } |
|
142 |
|
143 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
|
144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
|
145 UnicodeString src (source, srcLen); |
|
146 |
|
147 |
|
148 if (U_FAILURE(*status)) { |
|
149 return 0; |
|
150 } |
|
151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
|
152 |
|
153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
|
154 if (U_FAILURE(*status)) { |
|
155 return 0; |
|
156 } |
|
157 int32_t count = 0; |
|
158 for(int32_t i=0; i<retLen; i++){ |
|
159 matcher.reset(stringArray[i]); |
|
160 if(matcher.lookingAt(*status)){ |
|
161 count++; |
|
162 } |
|
163 } |
|
164 if(option == UPC_TRANSLATE && count > 1){ |
|
165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); |
|
166 exit(U_UNSUPPORTED_ERROR); |
|
167 } |
|
168 return count; |
|
169 } |
|
170 |
|
171 U_CFUNC int32_t |
|
172 getAt(const UChar* source, int32_t srcLen, |
|
173 UChar** dest, int32_t destCapacity, |
|
174 int32_t index, |
|
175 UParseCommentsOption option, |
|
176 UErrorCode* status){ |
|
177 |
|
178 if(status == NULL || U_FAILURE(*status)){ |
|
179 return 0; |
|
180 } |
|
181 |
|
182 UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
|
183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
|
184 UnicodeString src (source, srcLen); |
|
185 |
|
186 |
|
187 if (U_FAILURE(*status)) { |
|
188 return 0; |
|
189 } |
|
190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
|
191 |
|
192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
|
193 if (U_FAILURE(*status)) { |
|
194 return 0; |
|
195 } |
|
196 int32_t count = 0; |
|
197 for(int32_t i=0; i<retLen; i++){ |
|
198 matcher.reset(stringArray[i]); |
|
199 if(matcher.lookingAt(*status)){ |
|
200 if(count == index){ |
|
201 UnicodeString out = matcher.group(1, *status); |
|
202 return out.extract(*dest, destCapacity,*status); |
|
203 } |
|
204 count++; |
|
205 |
|
206 } |
|
207 } |
|
208 return 0; |
|
209 |
|
210 } |
|
211 |
|
212 U_CFUNC int32_t |
|
213 getTranslate( const UChar* source, int32_t srcLen, |
|
214 UChar** dest, int32_t destCapacity, |
|
215 UErrorCode* status){ |
|
216 UnicodeString notePatternString = "^translate\\s*?(.*)"; |
|
217 |
|
218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); |
|
219 return trim(*dest, destLen, status); |
|
220 } |
|
221 |
|
222 U_CFUNC int32_t |
|
223 getNote(const UChar* source, int32_t srcLen, |
|
224 UChar** dest, int32_t destCapacity, |
|
225 UErrorCode* status){ |
|
226 |
|
227 UnicodeString notePatternString = "^note\\s*?(.*)"; |
|
228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); |
|
229 return trim(*dest, destLen, status); |
|
230 |
|
231 } |
|
232 |
|
233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ |
|
234 |