1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/genrb/rle.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,405 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2000-2003, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* 1.12 +* File writejava.c 1.13 +* 1.14 +* Modification History: 1.15 +* 1.16 +* Date Name Description 1.17 +* 01/11/02 Ram Creation. 1.18 +******************************************************************************* 1.19 +*/ 1.20 +#include "rle.h" 1.21 +/** 1.22 + * The ESCAPE character is used during run-length encoding. It signals 1.23 + * a run of identical chars. 1.24 + */ 1.25 +static const uint16_t ESCAPE = 0xA5A5; 1.26 + 1.27 +/** 1.28 + * The ESCAPE_BYTE character is used during run-length encoding. It signals 1.29 + * a run of identical bytes. 1.30 + */ 1.31 +static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; 1.32 + 1.33 +/** 1.34 + * Append a byte to the given StringBuffer, packing two bytes into each 1.35 + * character. The state parameter maintains intermediary data between 1.36 + * calls. 1.37 + * @param state A two-element array, with state[0] == 0 if this is the 1.38 + * first byte of a pair, or state[0] != 0 if this is the second byte 1.39 + * of a pair, in which case state[1] is the first byte. 1.40 + */ 1.41 +static uint16_t* 1.42 +appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { 1.43 + if(!status || U_FAILURE(*status)){ 1.44 + return NULL; 1.45 + } 1.46 + if (state[0] != 0) { 1.47 + uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); 1.48 + if(buffer < buffLimit){ 1.49 + *buffer++ = c; 1.50 + }else{ 1.51 + *status = U_BUFFER_OVERFLOW_ERROR; 1.52 + } 1.53 + state[0] = 0; 1.54 + return buffer; 1.55 + } 1.56 + else { 1.57 + state[0] = 1; 1.58 + state[1] = value; 1.59 + return buffer; 1.60 + } 1.61 +} 1.62 +/** 1.63 + * Encode a run, possibly a degenerate run (of < 4 values). 1.64 + * @param length The length of the run; must be > 0 && <= 0xFF. 1.65 + */ 1.66 +static uint16_t* 1.67 +encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { 1.68 + if(!status || U_FAILURE(*status)){ 1.69 + return NULL; 1.70 + } 1.71 + if (length < 4) { 1.72 + int32_t j=0; 1.73 + for (; j<length; ++j) { 1.74 + if (value == ESCAPE_BYTE) { 1.75 + buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 1.76 + } 1.77 + buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 1.78 + } 1.79 + } 1.80 + else { 1.81 + if (length == ESCAPE_BYTE) { 1.82 + if (value == ESCAPE_BYTE){ 1.83 + buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); 1.84 + } 1.85 + buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 1.86 + --length; 1.87 + } 1.88 + buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 1.89 + buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); 1.90 + buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ 1.91 + } 1.92 + return buffer; 1.93 +} 1.94 + 1.95 +#define APPEND( buffer, bufLimit, value, num, status){ \ 1.96 + if(buffer<bufLimit){ \ 1.97 + *buffer++=(value); \ 1.98 + }else{ \ 1.99 + *status = U_BUFFER_OVERFLOW_ERROR; \ 1.100 + } \ 1.101 + num++; \ 1.102 +} 1.103 + 1.104 +/** 1.105 + * Encode a run, possibly a degenerate run (of < 4 values). 1.106 + * @param length The length of the run; must be > 0 && <= 0xFFFF. 1.107 + */ 1.108 +static uint16_t* 1.109 +encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { 1.110 + int32_t num=0; 1.111 + if (length < 4) { 1.112 + int j=0; 1.113 + for (; j<length; ++j) { 1.114 + if (value == (int32_t) ESCAPE){ 1.115 + APPEND(buffer,bufLimit,ESCAPE, num, status); 1.116 + 1.117 + } 1.118 + APPEND(buffer,bufLimit,value,num, status); 1.119 + } 1.120 + } 1.121 + else { 1.122 + if (length == (int32_t) ESCAPE) { 1.123 + if (value == (int32_t) ESCAPE){ 1.124 + APPEND(buffer,bufLimit,ESCAPE,num,status); 1.125 + 1.126 + } 1.127 + APPEND(buffer,bufLimit,value,num,status); 1.128 + --length; 1.129 + } 1.130 + APPEND(buffer,bufLimit,ESCAPE,num,status); 1.131 + APPEND(buffer,bufLimit,(uint16_t) length, num,status); 1.132 + APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */ 1.133 + } 1.134 + return buffer; 1.135 +} 1.136 + 1.137 +/** 1.138 + * Construct a string representing a char array. Use run-length encoding. 1.139 + * A character represents itself, unless it is the ESCAPE character. Then 1.140 + * the following notations are possible: 1.141 + * ESCAPE ESCAPE ESCAPE literal 1.142 + * ESCAPE n c n instances of character c 1.143 + * Since an encoded run occupies 3 characters, we only encode runs of 4 or 1.144 + * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 1.145 + * If we encounter a run where n == ESCAPE, we represent this as: 1.146 + * c ESCAPE n-1 c 1.147 + * The ESCAPE value is chosen so as not to collide with commonly 1.148 + * seen values. 1.149 + */ 1.150 +int32_t 1.151 +usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { 1.152 + uint16_t* bufLimit = buffer+bufLen; 1.153 + uint16_t* saveBuffer = buffer; 1.154 + if(buffer < bufLimit){ 1.155 + *buffer++ = (uint16_t)(srcLen>>16); 1.156 + if(buffer<bufLimit){ 1.157 + uint16_t runValue = src[0]; 1.158 + int32_t runLength = 1; 1.159 + int i=1; 1.160 + *buffer++ = (uint16_t) srcLen; 1.161 + 1.162 + for (; i<srcLen; ++i) { 1.163 + uint16_t s = src[i]; 1.164 + if (s == runValue && runLength < 0xFFFF){ 1.165 + ++runLength; 1.166 + }else { 1.167 + buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status); 1.168 + runValue = s; 1.169 + runLength = 1; 1.170 + } 1.171 + } 1.172 + buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status); 1.173 + }else{ 1.174 + *status = U_BUFFER_OVERFLOW_ERROR; 1.175 + } 1.176 + }else{ 1.177 + *status = U_BUFFER_OVERFLOW_ERROR; 1.178 + } 1.179 + return (int32_t)(buffer - saveBuffer); 1.180 +} 1.181 + 1.182 +/** 1.183 + * Construct a string representing a byte array. Use run-length encoding. 1.184 + * Two bytes are packed into a single char, with a single extra zero byte at 1.185 + * the end if needed. A byte represents itself, unless it is the 1.186 + * ESCAPE_BYTE. Then the following notations are possible: 1.187 + * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal 1.188 + * ESCAPE_BYTE n b n instances of byte b 1.189 + * Since an encoded run occupies 3 bytes, we only encode runs of 4 or 1.190 + * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. 1.191 + * If we encounter a run where n == ESCAPE_BYTE, we represent this as: 1.192 + * b ESCAPE_BYTE n-1 b 1.193 + * The ESCAPE_BYTE value is chosen so as not to collide with commonly 1.194 + * seen values. 1.195 + */ 1.196 +int32_t 1.197 +byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { 1.198 + const uint16_t* saveBuf = buffer; 1.199 + uint16_t* bufLimit = buffer+bufLen; 1.200 + if(buffer < bufLimit){ 1.201 + *buffer++ = ((uint16_t) (srcLen >> 16)); 1.202 + 1.203 + if(buffer<bufLimit){ 1.204 + uint8_t runValue = src[0]; 1.205 + int runLength = 1; 1.206 + uint8_t state[2]= {0}; 1.207 + int i=1; 1.208 + *buffer++=((uint16_t) srcLen); 1.209 + for (; i<srcLen; ++i) { 1.210 + uint8_t b = src[i]; 1.211 + if (b == runValue && runLength < 0xFF){ 1.212 + ++runLength; 1.213 + } 1.214 + else { 1.215 + buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); 1.216 + runValue = b; 1.217 + runLength = 1; 1.218 + } 1.219 + } 1.220 + buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); 1.221 + 1.222 + /* We must save the final byte, if there is one, by padding 1.223 + * an extra zero. 1.224 + */ 1.225 + if (state[0] != 0) { 1.226 + buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); 1.227 + } 1.228 + }else{ 1.229 + *status = U_BUFFER_OVERFLOW_ERROR; 1.230 + } 1.231 + }else{ 1.232 + *status = U_BUFFER_OVERFLOW_ERROR; 1.233 + } 1.234 + return (int32_t) (buffer - saveBuf); 1.235 +} 1.236 + 1.237 + 1.238 +/** 1.239 + * Construct an array of shorts from a run-length encoded string. 1.240 + */ 1.241 +int32_t 1.242 +rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { 1.243 + int32_t length = 0; 1.244 + int32_t ai = 0; 1.245 + int i=2; 1.246 + 1.247 + if(!status || U_FAILURE(*status)){ 1.248 + return 0; 1.249 + } 1.250 + /* the source is null terminated */ 1.251 + if(srcLen == -1){ 1.252 + srcLen = u_strlen(src); 1.253 + } 1.254 + if(srcLen <= 2){ 1.255 + return 2; 1.256 + } 1.257 + length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 1.258 + 1.259 + if(target == NULL){ 1.260 + return length; 1.261 + } 1.262 + if(tgtLen < length){ 1.263 + *status = U_BUFFER_OVERFLOW_ERROR; 1.264 + return length; 1.265 + } 1.266 + 1.267 + for (; i<srcLen; ++i) { 1.268 + uint16_t c = src[i]; 1.269 + if (c == ESCAPE) { 1.270 + c = src[++i]; 1.271 + if (c == ESCAPE) { 1.272 + target[ai++] = c; 1.273 + } else { 1.274 + int32_t runLength = (int32_t) c; 1.275 + uint16_t runValue = src[++i]; 1.276 + int j=0; 1.277 + for (; j<runLength; ++j) { 1.278 + target[ai++] = runValue; 1.279 + } 1.280 + } 1.281 + } 1.282 + else { 1.283 + target[ai++] = c; 1.284 + } 1.285 + } 1.286 + 1.287 + if (ai != length){ 1.288 + *status = U_INTERNAL_PROGRAM_ERROR; 1.289 + } 1.290 + 1.291 + return length; 1.292 +} 1.293 + 1.294 +/** 1.295 + * Construct an array of bytes from a run-length encoded string. 1.296 + */ 1.297 +int32_t 1.298 +rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { 1.299 + 1.300 + int32_t length = 0; 1.301 + UBool nextChar = TRUE; 1.302 + uint16_t c = 0; 1.303 + int32_t node = 0; 1.304 + int32_t runLength = 0; 1.305 + int32_t i = 2; 1.306 + int32_t ai=0; 1.307 + 1.308 + if(!status || U_FAILURE(*status)){ 1.309 + return 0; 1.310 + } 1.311 + /* the source is null terminated */ 1.312 + if(srcLen == -1){ 1.313 + srcLen = u_strlen(src); 1.314 + } 1.315 + if(srcLen <= 2){ 1.316 + return 2; 1.317 + } 1.318 + length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 1.319 + 1.320 + if(target == NULL){ 1.321 + return length; 1.322 + } 1.323 + if(tgtLen < length){ 1.324 + *status = U_BUFFER_OVERFLOW_ERROR; 1.325 + return length; 1.326 + } 1.327 + 1.328 + for (; ai<tgtLen; ) { 1.329 + /* This part of the loop places the next byte into the local 1.330 + * variable 'b' each time through the loop. It keeps the 1.331 + * current character in 'c' and uses the boolean 'nextChar' 1.332 + * to see if we've taken both bytes out of 'c' yet. 1.333 + */ 1.334 + uint8_t b; 1.335 + if (nextChar) { 1.336 + c = src[i++]; 1.337 + b = (uint8_t) (c >> 8); 1.338 + nextChar = FALSE; 1.339 + } 1.340 + else { 1.341 + b = (uint8_t) (c & 0xFF); 1.342 + nextChar = TRUE; 1.343 + } 1.344 + 1.345 + /* This part of the loop is a tiny state machine which handles 1.346 + * the parsing of the run-length encoding. This would be simpler 1.347 + * if we could look ahead, but we can't, so we use 'node' to 1.348 + * move between three nodes in the state machine. 1.349 + */ 1.350 + switch (node) { 1.351 + case 0: 1.352 + /* Normal idle node */ 1.353 + if (b == ESCAPE_BYTE) { 1.354 + node = 1; 1.355 + } 1.356 + else { 1.357 + target[ai++] = b; 1.358 + } 1.359 + break; 1.360 + case 1: 1.361 + /* We have seen one ESCAPE_BYTE; we expect either a second 1.362 + * one, or a run length and value. 1.363 + */ 1.364 + if (b == ESCAPE_BYTE) { 1.365 + target[ai++] = ESCAPE_BYTE; 1.366 + node = 0; 1.367 + } 1.368 + else { 1.369 + runLength = b; 1.370 + node = 2; 1.371 + } 1.372 + break; 1.373 + case 2: 1.374 + { 1.375 + int j=0; 1.376 + /* We have seen an ESCAPE_BYTE and length byte. We interpret 1.377 + * the next byte as the value to be repeated. 1.378 + */ 1.379 + for (; j<runLength; ++j){ 1.380 + if(ai<tgtLen){ 1.381 + target[ai++] = b; 1.382 + }else{ 1.383 + *status = U_BUFFER_OVERFLOW_ERROR; 1.384 + return ai; 1.385 + } 1.386 + } 1.387 + node = 0; 1.388 + break; 1.389 + } 1.390 + } 1.391 + } 1.392 + 1.393 + if (node != 0){ 1.394 + *status = U_INTERNAL_PROGRAM_ERROR; 1.395 + /*("Bad run-length encoded byte array")*/ 1.396 + return 0; 1.397 + } 1.398 + 1.399 + 1.400 + if (i != srcLen){ 1.401 + /*("Excess data in RLE byte array string");*/ 1.402 + *status = U_INTERNAL_PROGRAM_ERROR; 1.403 + return ai; 1.404 + } 1.405 + 1.406 + return ai; 1.407 +} 1.408 +