|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2000-2003, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * |
|
9 * File writejava.c |
|
10 * |
|
11 * Modification History: |
|
12 * |
|
13 * Date Name Description |
|
14 * 01/11/02 Ram Creation. |
|
15 ******************************************************************************* |
|
16 */ |
|
17 #include "rle.h" |
|
18 /** |
|
19 * The ESCAPE character is used during run-length encoding. It signals |
|
20 * a run of identical chars. |
|
21 */ |
|
22 static const uint16_t ESCAPE = 0xA5A5; |
|
23 |
|
24 /** |
|
25 * The ESCAPE_BYTE character is used during run-length encoding. It signals |
|
26 * a run of identical bytes. |
|
27 */ |
|
28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; |
|
29 |
|
30 /** |
|
31 * Append a byte to the given StringBuffer, packing two bytes into each |
|
32 * character. The state parameter maintains intermediary data between |
|
33 * calls. |
|
34 * @param state A two-element array, with state[0] == 0 if this is the |
|
35 * first byte of a pair, or state[0] != 0 if this is the second byte |
|
36 * of a pair, in which case state[1] is the first byte. |
|
37 */ |
|
38 static uint16_t* |
|
39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { |
|
40 if(!status || U_FAILURE(*status)){ |
|
41 return NULL; |
|
42 } |
|
43 if (state[0] != 0) { |
|
44 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); |
|
45 if(buffer < buffLimit){ |
|
46 *buffer++ = c; |
|
47 }else{ |
|
48 *status = U_BUFFER_OVERFLOW_ERROR; |
|
49 } |
|
50 state[0] = 0; |
|
51 return buffer; |
|
52 } |
|
53 else { |
|
54 state[0] = 1; |
|
55 state[1] = value; |
|
56 return buffer; |
|
57 } |
|
58 } |
|
59 /** |
|
60 * Encode a run, possibly a degenerate run (of < 4 values). |
|
61 * @param length The length of the run; must be > 0 && <= 0xFF. |
|
62 */ |
|
63 static uint16_t* |
|
64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { |
|
65 if(!status || U_FAILURE(*status)){ |
|
66 return NULL; |
|
67 } |
|
68 if (length < 4) { |
|
69 int32_t j=0; |
|
70 for (; j<length; ++j) { |
|
71 if (value == ESCAPE_BYTE) { |
|
72 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); |
|
73 } |
|
74 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); |
|
75 } |
|
76 } |
|
77 else { |
|
78 if (length == ESCAPE_BYTE) { |
|
79 if (value == ESCAPE_BYTE){ |
|
80 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); |
|
81 } |
|
82 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); |
|
83 --length; |
|
84 } |
|
85 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); |
|
86 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); |
|
87 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ |
|
88 } |
|
89 return buffer; |
|
90 } |
|
91 |
|
92 #define APPEND( buffer, bufLimit, value, num, status){ \ |
|
93 if(buffer<bufLimit){ \ |
|
94 *buffer++=(value); \ |
|
95 }else{ \ |
|
96 *status = U_BUFFER_OVERFLOW_ERROR; \ |
|
97 } \ |
|
98 num++; \ |
|
99 } |
|
100 |
|
101 /** |
|
102 * Encode a run, possibly a degenerate run (of < 4 values). |
|
103 * @param length The length of the run; must be > 0 && <= 0xFFFF. |
|
104 */ |
|
105 static uint16_t* |
|
106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { |
|
107 int32_t num=0; |
|
108 if (length < 4) { |
|
109 int j=0; |
|
110 for (; j<length; ++j) { |
|
111 if (value == (int32_t) ESCAPE){ |
|
112 APPEND(buffer,bufLimit,ESCAPE, num, status); |
|
113 |
|
114 } |
|
115 APPEND(buffer,bufLimit,value,num, status); |
|
116 } |
|
117 } |
|
118 else { |
|
119 if (length == (int32_t) ESCAPE) { |
|
120 if (value == (int32_t) ESCAPE){ |
|
121 APPEND(buffer,bufLimit,ESCAPE,num,status); |
|
122 |
|
123 } |
|
124 APPEND(buffer,bufLimit,value,num,status); |
|
125 --length; |
|
126 } |
|
127 APPEND(buffer,bufLimit,ESCAPE,num,status); |
|
128 APPEND(buffer,bufLimit,(uint16_t) length, num,status); |
|
129 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */ |
|
130 } |
|
131 return buffer; |
|
132 } |
|
133 |
|
134 /** |
|
135 * Construct a string representing a char array. Use run-length encoding. |
|
136 * A character represents itself, unless it is the ESCAPE character. Then |
|
137 * the following notations are possible: |
|
138 * ESCAPE ESCAPE ESCAPE literal |
|
139 * ESCAPE n c n instances of character c |
|
140 * Since an encoded run occupies 3 characters, we only encode runs of 4 or |
|
141 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. |
|
142 * If we encounter a run where n == ESCAPE, we represent this as: |
|
143 * c ESCAPE n-1 c |
|
144 * The ESCAPE value is chosen so as not to collide with commonly |
|
145 * seen values. |
|
146 */ |
|
147 int32_t |
|
148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { |
|
149 uint16_t* bufLimit = buffer+bufLen; |
|
150 uint16_t* saveBuffer = buffer; |
|
151 if(buffer < bufLimit){ |
|
152 *buffer++ = (uint16_t)(srcLen>>16); |
|
153 if(buffer<bufLimit){ |
|
154 uint16_t runValue = src[0]; |
|
155 int32_t runLength = 1; |
|
156 int i=1; |
|
157 *buffer++ = (uint16_t) srcLen; |
|
158 |
|
159 for (; i<srcLen; ++i) { |
|
160 uint16_t s = src[i]; |
|
161 if (s == runValue && runLength < 0xFFFF){ |
|
162 ++runLength; |
|
163 }else { |
|
164 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status); |
|
165 runValue = s; |
|
166 runLength = 1; |
|
167 } |
|
168 } |
|
169 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status); |
|
170 }else{ |
|
171 *status = U_BUFFER_OVERFLOW_ERROR; |
|
172 } |
|
173 }else{ |
|
174 *status = U_BUFFER_OVERFLOW_ERROR; |
|
175 } |
|
176 return (int32_t)(buffer - saveBuffer); |
|
177 } |
|
178 |
|
179 /** |
|
180 * Construct a string representing a byte array. Use run-length encoding. |
|
181 * Two bytes are packed into a single char, with a single extra zero byte at |
|
182 * the end if needed. A byte represents itself, unless it is the |
|
183 * ESCAPE_BYTE. Then the following notations are possible: |
|
184 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal |
|
185 * ESCAPE_BYTE n b n instances of byte b |
|
186 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or |
|
187 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. |
|
188 * If we encounter a run where n == ESCAPE_BYTE, we represent this as: |
|
189 * b ESCAPE_BYTE n-1 b |
|
190 * The ESCAPE_BYTE value is chosen so as not to collide with commonly |
|
191 * seen values. |
|
192 */ |
|
193 int32_t |
|
194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { |
|
195 const uint16_t* saveBuf = buffer; |
|
196 uint16_t* bufLimit = buffer+bufLen; |
|
197 if(buffer < bufLimit){ |
|
198 *buffer++ = ((uint16_t) (srcLen >> 16)); |
|
199 |
|
200 if(buffer<bufLimit){ |
|
201 uint8_t runValue = src[0]; |
|
202 int runLength = 1; |
|
203 uint8_t state[2]= {0}; |
|
204 int i=1; |
|
205 *buffer++=((uint16_t) srcLen); |
|
206 for (; i<srcLen; ++i) { |
|
207 uint8_t b = src[i]; |
|
208 if (b == runValue && runLength < 0xFF){ |
|
209 ++runLength; |
|
210 } |
|
211 else { |
|
212 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); |
|
213 runValue = b; |
|
214 runLength = 1; |
|
215 } |
|
216 } |
|
217 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); |
|
218 |
|
219 /* We must save the final byte, if there is one, by padding |
|
220 * an extra zero. |
|
221 */ |
|
222 if (state[0] != 0) { |
|
223 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); |
|
224 } |
|
225 }else{ |
|
226 *status = U_BUFFER_OVERFLOW_ERROR; |
|
227 } |
|
228 }else{ |
|
229 *status = U_BUFFER_OVERFLOW_ERROR; |
|
230 } |
|
231 return (int32_t) (buffer - saveBuf); |
|
232 } |
|
233 |
|
234 |
|
235 /** |
|
236 * Construct an array of shorts from a run-length encoded string. |
|
237 */ |
|
238 int32_t |
|
239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { |
|
240 int32_t length = 0; |
|
241 int32_t ai = 0; |
|
242 int i=2; |
|
243 |
|
244 if(!status || U_FAILURE(*status)){ |
|
245 return 0; |
|
246 } |
|
247 /* the source is null terminated */ |
|
248 if(srcLen == -1){ |
|
249 srcLen = u_strlen(src); |
|
250 } |
|
251 if(srcLen <= 2){ |
|
252 return 2; |
|
253 } |
|
254 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); |
|
255 |
|
256 if(target == NULL){ |
|
257 return length; |
|
258 } |
|
259 if(tgtLen < length){ |
|
260 *status = U_BUFFER_OVERFLOW_ERROR; |
|
261 return length; |
|
262 } |
|
263 |
|
264 for (; i<srcLen; ++i) { |
|
265 uint16_t c = src[i]; |
|
266 if (c == ESCAPE) { |
|
267 c = src[++i]; |
|
268 if (c == ESCAPE) { |
|
269 target[ai++] = c; |
|
270 } else { |
|
271 int32_t runLength = (int32_t) c; |
|
272 uint16_t runValue = src[++i]; |
|
273 int j=0; |
|
274 for (; j<runLength; ++j) { |
|
275 target[ai++] = runValue; |
|
276 } |
|
277 } |
|
278 } |
|
279 else { |
|
280 target[ai++] = c; |
|
281 } |
|
282 } |
|
283 |
|
284 if (ai != length){ |
|
285 *status = U_INTERNAL_PROGRAM_ERROR; |
|
286 } |
|
287 |
|
288 return length; |
|
289 } |
|
290 |
|
291 /** |
|
292 * Construct an array of bytes from a run-length encoded string. |
|
293 */ |
|
294 int32_t |
|
295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { |
|
296 |
|
297 int32_t length = 0; |
|
298 UBool nextChar = TRUE; |
|
299 uint16_t c = 0; |
|
300 int32_t node = 0; |
|
301 int32_t runLength = 0; |
|
302 int32_t i = 2; |
|
303 int32_t ai=0; |
|
304 |
|
305 if(!status || U_FAILURE(*status)){ |
|
306 return 0; |
|
307 } |
|
308 /* the source is null terminated */ |
|
309 if(srcLen == -1){ |
|
310 srcLen = u_strlen(src); |
|
311 } |
|
312 if(srcLen <= 2){ |
|
313 return 2; |
|
314 } |
|
315 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); |
|
316 |
|
317 if(target == NULL){ |
|
318 return length; |
|
319 } |
|
320 if(tgtLen < length){ |
|
321 *status = U_BUFFER_OVERFLOW_ERROR; |
|
322 return length; |
|
323 } |
|
324 |
|
325 for (; ai<tgtLen; ) { |
|
326 /* This part of the loop places the next byte into the local |
|
327 * variable 'b' each time through the loop. It keeps the |
|
328 * current character in 'c' and uses the boolean 'nextChar' |
|
329 * to see if we've taken both bytes out of 'c' yet. |
|
330 */ |
|
331 uint8_t b; |
|
332 if (nextChar) { |
|
333 c = src[i++]; |
|
334 b = (uint8_t) (c >> 8); |
|
335 nextChar = FALSE; |
|
336 } |
|
337 else { |
|
338 b = (uint8_t) (c & 0xFF); |
|
339 nextChar = TRUE; |
|
340 } |
|
341 |
|
342 /* This part of the loop is a tiny state machine which handles |
|
343 * the parsing of the run-length encoding. This would be simpler |
|
344 * if we could look ahead, but we can't, so we use 'node' to |
|
345 * move between three nodes in the state machine. |
|
346 */ |
|
347 switch (node) { |
|
348 case 0: |
|
349 /* Normal idle node */ |
|
350 if (b == ESCAPE_BYTE) { |
|
351 node = 1; |
|
352 } |
|
353 else { |
|
354 target[ai++] = b; |
|
355 } |
|
356 break; |
|
357 case 1: |
|
358 /* We have seen one ESCAPE_BYTE; we expect either a second |
|
359 * one, or a run length and value. |
|
360 */ |
|
361 if (b == ESCAPE_BYTE) { |
|
362 target[ai++] = ESCAPE_BYTE; |
|
363 node = 0; |
|
364 } |
|
365 else { |
|
366 runLength = b; |
|
367 node = 2; |
|
368 } |
|
369 break; |
|
370 case 2: |
|
371 { |
|
372 int j=0; |
|
373 /* We have seen an ESCAPE_BYTE and length byte. We interpret |
|
374 * the next byte as the value to be repeated. |
|
375 */ |
|
376 for (; j<runLength; ++j){ |
|
377 if(ai<tgtLen){ |
|
378 target[ai++] = b; |
|
379 }else{ |
|
380 *status = U_BUFFER_OVERFLOW_ERROR; |
|
381 return ai; |
|
382 } |
|
383 } |
|
384 node = 0; |
|
385 break; |
|
386 } |
|
387 } |
|
388 } |
|
389 |
|
390 if (node != 0){ |
|
391 *status = U_INTERNAL_PROGRAM_ERROR; |
|
392 /*("Bad run-length encoded byte array")*/ |
|
393 return 0; |
|
394 } |
|
395 |
|
396 |
|
397 if (i != srcLen){ |
|
398 /*("Excess data in RLE byte array string");*/ |
|
399 *status = U_INTERNAL_PROGRAM_ERROR; |
|
400 return ai; |
|
401 } |
|
402 |
|
403 return ai; |
|
404 } |
|
405 |