|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (c) 2002-2011, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * Author: Alan Liu |
|
7 * Created: October 30 2002 |
|
8 * Since: ICU 2.4 |
|
9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2. |
|
10 ********************************************************************** |
|
11 */ |
|
12 #include "propname.h" |
|
13 #include "unicode/uchar.h" |
|
14 #include "unicode/udata.h" |
|
15 #include "unicode/uscript.h" |
|
16 #include "umutex.h" |
|
17 #include "cmemory.h" |
|
18 #include "cstring.h" |
|
19 #include "ucln_cmn.h" |
|
20 #include "uarrsort.h" |
|
21 #include "uinvchar.h" |
|
22 |
|
23 #define INCLUDED_FROM_PROPNAME_CPP |
|
24 #include "propname_data.h" |
|
25 |
|
26 U_CDECL_BEGIN |
|
27 |
|
28 /** |
|
29 * Get the next non-ignorable ASCII character from a property name |
|
30 * and lowercases it. |
|
31 * @return ((advance count for the name)<<8)|character |
|
32 */ |
|
33 static inline int32_t |
|
34 getASCIIPropertyNameChar(const char *name) { |
|
35 int32_t i; |
|
36 char c; |
|
37 |
|
38 /* Ignore delimiters '-', '_', and ASCII White_Space */ |
|
39 for(i=0; |
|
40 (c=name[i++])==0x2d || c==0x5f || |
|
41 c==0x20 || (0x09<=c && c<=0x0d); |
|
42 ) {} |
|
43 |
|
44 if(c!=0) { |
|
45 return (i<<8)|(uint8_t)uprv_asciitolower((char)c); |
|
46 } else { |
|
47 return i<<8; |
|
48 } |
|
49 } |
|
50 |
|
51 /** |
|
52 * Get the next non-ignorable EBCDIC character from a property name |
|
53 * and lowercases it. |
|
54 * @return ((advance count for the name)<<8)|character |
|
55 */ |
|
56 static inline int32_t |
|
57 getEBCDICPropertyNameChar(const char *name) { |
|
58 int32_t i; |
|
59 char c; |
|
60 |
|
61 /* Ignore delimiters '-', '_', and EBCDIC White_Space */ |
|
62 for(i=0; |
|
63 (c=name[i++])==0x60 || c==0x6d || |
|
64 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; |
|
65 ) {} |
|
66 |
|
67 if(c!=0) { |
|
68 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); |
|
69 } else { |
|
70 return i<<8; |
|
71 } |
|
72 } |
|
73 |
|
74 /** |
|
75 * Unicode property names and property value names are compared "loosely". |
|
76 * |
|
77 * UCD.html 4.0.1 says: |
|
78 * For all property names, property value names, and for property values for |
|
79 * Enumerated, Binary, or Catalog properties, use the following |
|
80 * loose matching rule: |
|
81 * |
|
82 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. |
|
83 * |
|
84 * This function does just that, for (char *) name strings. |
|
85 * It is almost identical to ucnv_compareNames() but also ignores |
|
86 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). |
|
87 * |
|
88 * @internal |
|
89 */ |
|
90 |
|
91 U_CAPI int32_t U_EXPORT2 |
|
92 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { |
|
93 int32_t rc, r1, r2; |
|
94 |
|
95 for(;;) { |
|
96 r1=getASCIIPropertyNameChar(name1); |
|
97 r2=getASCIIPropertyNameChar(name2); |
|
98 |
|
99 /* If we reach the ends of both strings then they match */ |
|
100 if(((r1|r2)&0xff)==0) { |
|
101 return 0; |
|
102 } |
|
103 |
|
104 /* Compare the lowercased characters */ |
|
105 if(r1!=r2) { |
|
106 rc=(r1&0xff)-(r2&0xff); |
|
107 if(rc!=0) { |
|
108 return rc; |
|
109 } |
|
110 } |
|
111 |
|
112 name1+=r1>>8; |
|
113 name2+=r2>>8; |
|
114 } |
|
115 } |
|
116 |
|
117 U_CAPI int32_t U_EXPORT2 |
|
118 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { |
|
119 int32_t rc, r1, r2; |
|
120 |
|
121 for(;;) { |
|
122 r1=getEBCDICPropertyNameChar(name1); |
|
123 r2=getEBCDICPropertyNameChar(name2); |
|
124 |
|
125 /* If we reach the ends of both strings then they match */ |
|
126 if(((r1|r2)&0xff)==0) { |
|
127 return 0; |
|
128 } |
|
129 |
|
130 /* Compare the lowercased characters */ |
|
131 if(r1!=r2) { |
|
132 rc=(r1&0xff)-(r2&0xff); |
|
133 if(rc!=0) { |
|
134 return rc; |
|
135 } |
|
136 } |
|
137 |
|
138 name1+=r1>>8; |
|
139 name2+=r2>>8; |
|
140 } |
|
141 } |
|
142 |
|
143 U_CDECL_END |
|
144 |
|
145 U_NAMESPACE_BEGIN |
|
146 |
|
147 int32_t PropNameData::findProperty(int32_t property) { |
|
148 int32_t i=1; // valueMaps index, initially after numRanges |
|
149 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { |
|
150 // Read and skip the start and limit of this range. |
|
151 int32_t start=valueMaps[i]; |
|
152 int32_t limit=valueMaps[i+1]; |
|
153 i+=2; |
|
154 if(property<start) { |
|
155 break; |
|
156 } |
|
157 if(property<limit) { |
|
158 return i+(property-start)*2; |
|
159 } |
|
160 i+=(limit-start)*2; // Skip all entries for this range. |
|
161 } |
|
162 return 0; |
|
163 } |
|
164 |
|
165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) { |
|
166 if(valueMapIndex==0) { |
|
167 return 0; // The property does not have named values. |
|
168 } |
|
169 ++valueMapIndex; // Skip the BytesTrie offset. |
|
170 int32_t numRanges=valueMaps[valueMapIndex++]; |
|
171 if(numRanges<0x10) { |
|
172 // Ranges of values. |
|
173 for(; numRanges>0; --numRanges) { |
|
174 // Read and skip the start and limit of this range. |
|
175 int32_t start=valueMaps[valueMapIndex]; |
|
176 int32_t limit=valueMaps[valueMapIndex+1]; |
|
177 valueMapIndex+=2; |
|
178 if(value<start) { |
|
179 break; |
|
180 } |
|
181 if(value<limit) { |
|
182 return valueMaps[valueMapIndex+value-start]; |
|
183 } |
|
184 valueMapIndex+=limit-start; // Skip all entries for this range. |
|
185 } |
|
186 } else { |
|
187 // List of values. |
|
188 int32_t valuesStart=valueMapIndex; |
|
189 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; |
|
190 do { |
|
191 int32_t v=valueMaps[valueMapIndex]; |
|
192 if(value<v) { |
|
193 break; |
|
194 } |
|
195 if(value==v) { |
|
196 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; |
|
197 } |
|
198 } while(++valueMapIndex<nameGroupOffsetsStart); |
|
199 } |
|
200 return 0; |
|
201 } |
|
202 |
|
203 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) { |
|
204 int32_t numNames=*nameGroup++; |
|
205 if(nameIndex<0 || numNames<=nameIndex) { |
|
206 return NULL; |
|
207 } |
|
208 // Skip nameIndex names. |
|
209 for(; nameIndex>0; --nameIndex) { |
|
210 nameGroup=uprv_strchr(nameGroup, 0)+1; |
|
211 } |
|
212 if(*nameGroup==0) { |
|
213 return NULL; // no name (Property[Value]Aliases.txt has "n/a") |
|
214 } |
|
215 return nameGroup; |
|
216 } |
|
217 |
|
218 UBool PropNameData::containsName(BytesTrie &trie, const char *name) { |
|
219 if(name==NULL) { |
|
220 return FALSE; |
|
221 } |
|
222 UStringTrieResult result=USTRINGTRIE_NO_VALUE; |
|
223 char c; |
|
224 while((c=*name++)!=0) { |
|
225 c=uprv_invCharToLowercaseAscii(c); |
|
226 // Ignore delimiters '-', '_', and ASCII White_Space. |
|
227 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { |
|
228 continue; |
|
229 } |
|
230 if(!USTRINGTRIE_HAS_NEXT(result)) { |
|
231 return FALSE; |
|
232 } |
|
233 result=trie.next((uint8_t)c); |
|
234 } |
|
235 return USTRINGTRIE_HAS_VALUE(result); |
|
236 } |
|
237 |
|
238 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { |
|
239 int32_t valueMapIndex=findProperty(property); |
|
240 if(valueMapIndex==0) { |
|
241 return NULL; // Not a known property. |
|
242 } |
|
243 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); |
|
244 } |
|
245 |
|
246 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { |
|
247 int32_t valueMapIndex=findProperty(property); |
|
248 if(valueMapIndex==0) { |
|
249 return NULL; // Not a known property. |
|
250 } |
|
251 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); |
|
252 if(nameGroupOffset==0) { |
|
253 return NULL; |
|
254 } |
|
255 return getName(nameGroups+nameGroupOffset, nameChoice); |
|
256 } |
|
257 |
|
258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { |
|
259 BytesTrie trie(bytesTries+bytesTrieOffset); |
|
260 if(containsName(trie, alias)) { |
|
261 return trie.getValue(); |
|
262 } else { |
|
263 return UCHAR_INVALID_CODE; |
|
264 } |
|
265 } |
|
266 |
|
267 int32_t PropNameData::getPropertyEnum(const char *alias) { |
|
268 return getPropertyOrValueEnum(0, alias); |
|
269 } |
|
270 |
|
271 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { |
|
272 int32_t valueMapIndex=findProperty(property); |
|
273 if(valueMapIndex==0) { |
|
274 return UCHAR_INVALID_CODE; // Not a known property. |
|
275 } |
|
276 valueMapIndex=valueMaps[valueMapIndex+1]; |
|
277 if(valueMapIndex==0) { |
|
278 return UCHAR_INVALID_CODE; // The property does not have named values. |
|
279 } |
|
280 // valueMapIndex is the start of the property's valueMap, |
|
281 // where the first word is the BytesTrie offset. |
|
282 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); |
|
283 } |
|
284 U_NAMESPACE_END |
|
285 |
|
286 //---------------------------------------------------------------------- |
|
287 // Public API implementation |
|
288 |
|
289 U_CAPI const char* U_EXPORT2 |
|
290 u_getPropertyName(UProperty property, |
|
291 UPropertyNameChoice nameChoice) { |
|
292 U_NAMESPACE_USE |
|
293 return PropNameData::getPropertyName(property, nameChoice); |
|
294 } |
|
295 |
|
296 U_CAPI UProperty U_EXPORT2 |
|
297 u_getPropertyEnum(const char* alias) { |
|
298 U_NAMESPACE_USE |
|
299 return (UProperty)PropNameData::getPropertyEnum(alias); |
|
300 } |
|
301 |
|
302 U_CAPI const char* U_EXPORT2 |
|
303 u_getPropertyValueName(UProperty property, |
|
304 int32_t value, |
|
305 UPropertyNameChoice nameChoice) { |
|
306 U_NAMESPACE_USE |
|
307 return PropNameData::getPropertyValueName(property, value, nameChoice); |
|
308 } |
|
309 |
|
310 U_CAPI int32_t U_EXPORT2 |
|
311 u_getPropertyValueEnum(UProperty property, |
|
312 const char* alias) { |
|
313 U_NAMESPACE_USE |
|
314 return PropNameData::getPropertyValueEnum(property, alias); |
|
315 } |
|
316 |
|
317 U_CAPI const char* U_EXPORT2 |
|
318 uscript_getName(UScriptCode scriptCode){ |
|
319 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, |
|
320 U_LONG_PROPERTY_NAME); |
|
321 } |
|
322 |
|
323 U_CAPI const char* U_EXPORT2 |
|
324 uscript_getShortName(UScriptCode scriptCode){ |
|
325 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, |
|
326 U_SHORT_PROPERTY_NAME); |
|
327 } |