|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1999-2011, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************/ |
|
8 |
|
9 |
|
10 /*------------------------------------------------------------------------------ |
|
11 * |
|
12 * UCommonData An abstract interface for dealing with ICU Common Data Files. |
|
13 * ICU Common Data Files are a grouping of a number of individual |
|
14 * data items (resources, converters, tables, anything) into a |
|
15 * single file or dll. The combined format includes a table of |
|
16 * contents for locating the individual items by name. |
|
17 * |
|
18 * Two formats for the table of contents are supported, which is |
|
19 * why there is an abstract inteface involved. |
|
20 * |
|
21 */ |
|
22 |
|
23 #include "unicode/utypes.h" |
|
24 #include "unicode/udata.h" |
|
25 #include "cstring.h" |
|
26 #include "ucmndata.h" |
|
27 #include "udatamem.h" |
|
28 |
|
29 #if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) |
|
30 # include <stdio.h> |
|
31 #endif |
|
32 |
|
33 U_CFUNC uint16_t |
|
34 udata_getHeaderSize(const DataHeader *udh) { |
|
35 if(udh==NULL) { |
|
36 return 0; |
|
37 } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { |
|
38 /* same endianness */ |
|
39 return udh->dataHeader.headerSize; |
|
40 } else { |
|
41 /* opposite endianness */ |
|
42 uint16_t x=udh->dataHeader.headerSize; |
|
43 return (uint16_t)((x<<8)|(x>>8)); |
|
44 } |
|
45 } |
|
46 |
|
47 U_CFUNC uint16_t |
|
48 udata_getInfoSize(const UDataInfo *info) { |
|
49 if(info==NULL) { |
|
50 return 0; |
|
51 } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { |
|
52 /* same endianness */ |
|
53 return info->size; |
|
54 } else { |
|
55 /* opposite endianness */ |
|
56 uint16_t x=info->size; |
|
57 return (uint16_t)((x<<8)|(x>>8)); |
|
58 } |
|
59 } |
|
60 |
|
61 /*-----------------------------------------------------------------------------* |
|
62 * * |
|
63 * Pointer TOCs. TODO: This form of table-of-contents should be removed * |
|
64 * because DLLs must be relocated on loading to correct the * |
|
65 * pointer values and this operation makes shared memory * |
|
66 * mapping of the data much less likely to work. * |
|
67 * * |
|
68 *-----------------------------------------------------------------------------*/ |
|
69 typedef struct { |
|
70 const char *entryName; |
|
71 const DataHeader *pHeader; |
|
72 } PointerTOCEntry; |
|
73 |
|
74 |
|
75 typedef struct { |
|
76 uint32_t count; |
|
77 uint32_t reserved; |
|
78 PointerTOCEntry entry[2]; /* Actual size is from count. */ |
|
79 } PointerTOC; |
|
80 |
|
81 |
|
82 /* definition of OffsetTOC struct types moved to ucmndata.h */ |
|
83 |
|
84 /*-----------------------------------------------------------------------------* |
|
85 * * |
|
86 * entry point lookup implementations * |
|
87 * * |
|
88 *-----------------------------------------------------------------------------*/ |
|
89 |
|
90 #ifndef MIN |
|
91 #define MIN(a,b) (((a)<(b)) ? (a) : (b)) |
|
92 #endif |
|
93 |
|
94 /** |
|
95 * Compare strings where we know the shared prefix length, |
|
96 * and advance the prefix length as we find that the strings share even more characters. |
|
97 */ |
|
98 static int32_t |
|
99 strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) { |
|
100 int32_t pl=*pPrefixLength; |
|
101 int32_t cmp=0; |
|
102 s1+=pl; |
|
103 s2+=pl; |
|
104 for(;;) { |
|
105 int32_t c1=(uint8_t)*s1++; |
|
106 int32_t c2=(uint8_t)*s2++; |
|
107 cmp=c1-c2; |
|
108 if(cmp!=0 || c1==0) { /* different or done */ |
|
109 break; |
|
110 } |
|
111 ++pl; /* increment shared same-prefix length */ |
|
112 } |
|
113 *pPrefixLength=pl; |
|
114 return cmp; |
|
115 } |
|
116 |
|
117 static int32_t |
|
118 offsetTOCPrefixBinarySearch(const char *s, const char *names, |
|
119 const UDataOffsetTOCEntry *toc, int32_t count) { |
|
120 int32_t start=0; |
|
121 int32_t limit=count; |
|
122 /* |
|
123 * Remember the shared prefix between s, start and limit, |
|
124 * and don't compare that shared prefix again. |
|
125 * The shared prefix should get longer as we narrow the [start, limit[ range. |
|
126 */ |
|
127 int32_t startPrefixLength=0; |
|
128 int32_t limitPrefixLength=0; |
|
129 if(count==0) { |
|
130 return -1; |
|
131 } |
|
132 /* |
|
133 * Prime the prefix lengths so that we don't keep prefixLength at 0 until |
|
134 * both the start and limit indexes have moved. |
|
135 * At the same time, we find if s is one of the start and (limit-1) names, |
|
136 * and if not, exclude them from the actual binary search. |
|
137 */ |
|
138 if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) { |
|
139 return 0; |
|
140 } |
|
141 ++start; |
|
142 --limit; |
|
143 if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) { |
|
144 return limit; |
|
145 } |
|
146 while(start<limit) { |
|
147 int32_t i=(start+limit)/2; |
|
148 int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength); |
|
149 int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength); |
|
150 if(cmp<0) { |
|
151 limit=i; |
|
152 limitPrefixLength=prefixLength; |
|
153 } else if(cmp==0) { |
|
154 return i; |
|
155 } else { |
|
156 start=i+1; |
|
157 startPrefixLength=prefixLength; |
|
158 } |
|
159 } |
|
160 return -1; |
|
161 } |
|
162 |
|
163 static int32_t |
|
164 pointerTOCPrefixBinarySearch(const char *s, const PointerTOCEntry *toc, int32_t count) { |
|
165 int32_t start=0; |
|
166 int32_t limit=count; |
|
167 /* |
|
168 * Remember the shared prefix between s, start and limit, |
|
169 * and don't compare that shared prefix again. |
|
170 * The shared prefix should get longer as we narrow the [start, limit[ range. |
|
171 */ |
|
172 int32_t startPrefixLength=0; |
|
173 int32_t limitPrefixLength=0; |
|
174 if(count==0) { |
|
175 return -1; |
|
176 } |
|
177 /* |
|
178 * Prime the prefix lengths so that we don't keep prefixLength at 0 until |
|
179 * both the start and limit indexes have moved. |
|
180 * At the same time, we find if s is one of the start and (limit-1) names, |
|
181 * and if not, exclude them from the actual binary search. |
|
182 */ |
|
183 if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) { |
|
184 return 0; |
|
185 } |
|
186 ++start; |
|
187 --limit; |
|
188 if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) { |
|
189 return limit; |
|
190 } |
|
191 while(start<limit) { |
|
192 int32_t i=(start+limit)/2; |
|
193 int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength); |
|
194 int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength); |
|
195 if(cmp<0) { |
|
196 limit=i; |
|
197 limitPrefixLength=prefixLength; |
|
198 } else if(cmp==0) { |
|
199 return i; |
|
200 } else { |
|
201 start=i+1; |
|
202 startPrefixLength=prefixLength; |
|
203 } |
|
204 } |
|
205 return -1; |
|
206 } |
|
207 |
|
208 static uint32_t offsetTOCEntryCount(const UDataMemory *pData) { |
|
209 int32_t retVal=0; |
|
210 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; |
|
211 if (toc != NULL) { |
|
212 retVal = toc->count; |
|
213 } |
|
214 return retVal; |
|
215 } |
|
216 |
|
217 static const DataHeader * |
|
218 offsetTOCLookupFn(const UDataMemory *pData, |
|
219 const char *tocEntryName, |
|
220 int32_t *pLength, |
|
221 UErrorCode *pErrorCode) { |
|
222 const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; |
|
223 if(toc!=NULL) { |
|
224 const char *base=(const char *)toc; |
|
225 int32_t number, count=(int32_t)toc->count; |
|
226 |
|
227 /* perform a binary search for the data in the common data's table of contents */ |
|
228 #if defined (UDATA_DEBUG_DUMP) |
|
229 /* list the contents of the TOC each time .. not recommended */ |
|
230 for(number=0; number<count; ++number) { |
|
231 fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]); |
|
232 } |
|
233 #endif |
|
234 number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count); |
|
235 if(number>=0) { |
|
236 /* found it */ |
|
237 const UDataOffsetTOCEntry *entry=toc->entry+number; |
|
238 #ifdef UDATA_DEBUG |
|
239 fprintf(stderr, "%s: Found.\n", tocEntryName); |
|
240 #endif |
|
241 if((number+1) < count) { |
|
242 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset); |
|
243 } else { |
|
244 *pLength = -1; |
|
245 } |
|
246 return (const DataHeader *)(base+entry->dataOffset); |
|
247 } else { |
|
248 #ifdef UDATA_DEBUG |
|
249 fprintf(stderr, "%s: Not found.\n", tocEntryName); |
|
250 #endif |
|
251 return NULL; |
|
252 } |
|
253 } else { |
|
254 #ifdef UDATA_DEBUG |
|
255 fprintf(stderr, "returning header\n"); |
|
256 #endif |
|
257 |
|
258 return pData->pHeader; |
|
259 } |
|
260 } |
|
261 |
|
262 |
|
263 static uint32_t pointerTOCEntryCount(const UDataMemory *pData) { |
|
264 const PointerTOC *toc = (PointerTOC *)pData->toc; |
|
265 return (uint32_t)((toc != NULL) ? (toc->count) : 0); |
|
266 } |
|
267 |
|
268 |
|
269 static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData, |
|
270 const char *name, |
|
271 int32_t *pLength, |
|
272 UErrorCode *pErrorCode) { |
|
273 if(pData->toc!=NULL) { |
|
274 const PointerTOC *toc = (PointerTOC *)pData->toc; |
|
275 int32_t number, count=(int32_t)toc->count; |
|
276 |
|
277 #if defined (UDATA_DEBUG_DUMP) |
|
278 /* list the contents of the TOC each time .. not recommended */ |
|
279 for(number=0; number<count; ++number) { |
|
280 fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName); |
|
281 } |
|
282 #endif |
|
283 number=pointerTOCPrefixBinarySearch(name, toc->entry, count); |
|
284 if(number>=0) { |
|
285 /* found it */ |
|
286 #ifdef UDATA_DEBUG |
|
287 fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); |
|
288 #endif |
|
289 *pLength=-1; |
|
290 return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader); |
|
291 } else { |
|
292 #ifdef UDATA_DEBUG |
|
293 fprintf(stderr, "%s: Not found.\n", name); |
|
294 #endif |
|
295 return NULL; |
|
296 } |
|
297 } else { |
|
298 return pData->pHeader; |
|
299 } |
|
300 } |
|
301 |
|
302 static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount}; |
|
303 static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount}; |
|
304 |
|
305 |
|
306 |
|
307 /*----------------------------------------------------------------------* |
|
308 * * |
|
309 * checkCommonData Validate the format of a common data file. * |
|
310 * Fill in the virtual function ptr based on TOC type * |
|
311 * If the data is invalid, close the UDataMemory * |
|
312 * and set the appropriate error code. * |
|
313 * * |
|
314 *----------------------------------------------------------------------*/ |
|
315 U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { |
|
316 if (U_FAILURE(*err)) { |
|
317 return; |
|
318 } |
|
319 |
|
320 if(udm==NULL || udm->pHeader==NULL) { |
|
321 *err=U_INVALID_FORMAT_ERROR; |
|
322 } else if(!(udm->pHeader->dataHeader.magic1==0xda && |
|
323 udm->pHeader->dataHeader.magic2==0x27 && |
|
324 udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && |
|
325 udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) |
|
326 ) { |
|
327 /* header not valid */ |
|
328 *err=U_INVALID_FORMAT_ERROR; |
|
329 } |
|
330 else if (udm->pHeader->info.dataFormat[0]==0x43 && |
|
331 udm->pHeader->info.dataFormat[1]==0x6d && |
|
332 udm->pHeader->info.dataFormat[2]==0x6e && |
|
333 udm->pHeader->info.dataFormat[3]==0x44 && |
|
334 udm->pHeader->info.formatVersion[0]==1 |
|
335 ) { |
|
336 /* dataFormat="CmnD" */ |
|
337 udm->vFuncs = &CmnDFuncs; |
|
338 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); |
|
339 } |
|
340 else if(udm->pHeader->info.dataFormat[0]==0x54 && |
|
341 udm->pHeader->info.dataFormat[1]==0x6f && |
|
342 udm->pHeader->info.dataFormat[2]==0x43 && |
|
343 udm->pHeader->info.dataFormat[3]==0x50 && |
|
344 udm->pHeader->info.formatVersion[0]==1 |
|
345 ) { |
|
346 /* dataFormat="ToCP" */ |
|
347 udm->vFuncs = &ToCPFuncs; |
|
348 udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); |
|
349 } |
|
350 else { |
|
351 /* dataFormat not recognized */ |
|
352 *err=U_INVALID_FORMAT_ERROR; |
|
353 } |
|
354 |
|
355 if (U_FAILURE(*err)) { |
|
356 /* If the data is no good and we memory-mapped it ourselves, |
|
357 * close the memory mapping so it doesn't leak. Note that this has |
|
358 * no effect on non-memory mapped data, other than clearing fields in udm. |
|
359 */ |
|
360 udata_close(udm); |
|
361 } |
|
362 } |
|
363 |
|
364 /* |
|
365 * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package |
|
366 * header but not its sub-items. |
|
367 * This function will be needed for automatic runtime swapping. |
|
368 * Sub-items should not be swapped to limit the swapping to the parts of the |
|
369 * package that are actually used. |
|
370 * |
|
371 * Since lengths of items are implicit in the order and offsets of their |
|
372 * ToC entries, and since offsets are relative to the start of the ToC, |
|
373 * a swapped version may need to generate a different data structure |
|
374 * with pointers to the original data items and with their lengths |
|
375 * (-1 for the last one if it is not known), and maybe even pointers to the |
|
376 * swapped versions of the items. |
|
377 * These pointers to swapped versions would establish a cache; |
|
378 * instead, each open data item could simply own the storage for its swapped |
|
379 * data. This fits better with the current design. |
|
380 * |
|
381 * markus 2003sep18 Jitterbug 2235 |
|
382 */ |