intl/icu/source/tools/genrb/rle.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2000-2003, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 *
michael@0 9 * File writejava.c
michael@0 10 *
michael@0 11 * Modification History:
michael@0 12 *
michael@0 13 * Date Name Description
michael@0 14 * 01/11/02 Ram Creation.
michael@0 15 *******************************************************************************
michael@0 16 */
michael@0 17 #include "rle.h"
michael@0 18 /**
michael@0 19 * The ESCAPE character is used during run-length encoding. It signals
michael@0 20 * a run of identical chars.
michael@0 21 */
michael@0 22 static const uint16_t ESCAPE = 0xA5A5;
michael@0 23
michael@0 24 /**
michael@0 25 * The ESCAPE_BYTE character is used during run-length encoding. It signals
michael@0 26 * a run of identical bytes.
michael@0 27 */
michael@0 28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
michael@0 29
michael@0 30 /**
michael@0 31 * Append a byte to the given StringBuffer, packing two bytes into each
michael@0 32 * character. The state parameter maintains intermediary data between
michael@0 33 * calls.
michael@0 34 * @param state A two-element array, with state[0] == 0 if this is the
michael@0 35 * first byte of a pair, or state[0] != 0 if this is the second byte
michael@0 36 * of a pair, in which case state[1] is the first byte.
michael@0 37 */
michael@0 38 static uint16_t*
michael@0 39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
michael@0 40 if(!status || U_FAILURE(*status)){
michael@0 41 return NULL;
michael@0 42 }
michael@0 43 if (state[0] != 0) {
michael@0 44 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
michael@0 45 if(buffer < buffLimit){
michael@0 46 *buffer++ = c;
michael@0 47 }else{
michael@0 48 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 49 }
michael@0 50 state[0] = 0;
michael@0 51 return buffer;
michael@0 52 }
michael@0 53 else {
michael@0 54 state[0] = 1;
michael@0 55 state[1] = value;
michael@0 56 return buffer;
michael@0 57 }
michael@0 58 }
michael@0 59 /**
michael@0 60 * Encode a run, possibly a degenerate run (of < 4 values).
michael@0 61 * @param length The length of the run; must be > 0 && <= 0xFF.
michael@0 62 */
michael@0 63 static uint16_t*
michael@0 64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
michael@0 65 if(!status || U_FAILURE(*status)){
michael@0 66 return NULL;
michael@0 67 }
michael@0 68 if (length < 4) {
michael@0 69 int32_t j=0;
michael@0 70 for (; j<length; ++j) {
michael@0 71 if (value == ESCAPE_BYTE) {
michael@0 72 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
michael@0 73 }
michael@0 74 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
michael@0 75 }
michael@0 76 }
michael@0 77 else {
michael@0 78 if (length == ESCAPE_BYTE) {
michael@0 79 if (value == ESCAPE_BYTE){
michael@0 80 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
michael@0 81 }
michael@0 82 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
michael@0 83 --length;
michael@0 84 }
michael@0 85 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
michael@0 86 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
michael@0 87 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
michael@0 88 }
michael@0 89 return buffer;
michael@0 90 }
michael@0 91
michael@0 92 #define APPEND( buffer, bufLimit, value, num, status){ \
michael@0 93 if(buffer<bufLimit){ \
michael@0 94 *buffer++=(value); \
michael@0 95 }else{ \
michael@0 96 *status = U_BUFFER_OVERFLOW_ERROR; \
michael@0 97 } \
michael@0 98 num++; \
michael@0 99 }
michael@0 100
michael@0 101 /**
michael@0 102 * Encode a run, possibly a degenerate run (of < 4 values).
michael@0 103 * @param length The length of the run; must be > 0 && <= 0xFFFF.
michael@0 104 */
michael@0 105 static uint16_t*
michael@0 106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
michael@0 107 int32_t num=0;
michael@0 108 if (length < 4) {
michael@0 109 int j=0;
michael@0 110 for (; j<length; ++j) {
michael@0 111 if (value == (int32_t) ESCAPE){
michael@0 112 APPEND(buffer,bufLimit,ESCAPE, num, status);
michael@0 113
michael@0 114 }
michael@0 115 APPEND(buffer,bufLimit,value,num, status);
michael@0 116 }
michael@0 117 }
michael@0 118 else {
michael@0 119 if (length == (int32_t) ESCAPE) {
michael@0 120 if (value == (int32_t) ESCAPE){
michael@0 121 APPEND(buffer,bufLimit,ESCAPE,num,status);
michael@0 122
michael@0 123 }
michael@0 124 APPEND(buffer,bufLimit,value,num,status);
michael@0 125 --length;
michael@0 126 }
michael@0 127 APPEND(buffer,bufLimit,ESCAPE,num,status);
michael@0 128 APPEND(buffer,bufLimit,(uint16_t) length, num,status);
michael@0 129 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
michael@0 130 }
michael@0 131 return buffer;
michael@0 132 }
michael@0 133
michael@0 134 /**
michael@0 135 * Construct a string representing a char array. Use run-length encoding.
michael@0 136 * A character represents itself, unless it is the ESCAPE character. Then
michael@0 137 * the following notations are possible:
michael@0 138 * ESCAPE ESCAPE ESCAPE literal
michael@0 139 * ESCAPE n c n instances of character c
michael@0 140 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
michael@0 141 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
michael@0 142 * If we encounter a run where n == ESCAPE, we represent this as:
michael@0 143 * c ESCAPE n-1 c
michael@0 144 * The ESCAPE value is chosen so as not to collide with commonly
michael@0 145 * seen values.
michael@0 146 */
michael@0 147 int32_t
michael@0 148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
michael@0 149 uint16_t* bufLimit = buffer+bufLen;
michael@0 150 uint16_t* saveBuffer = buffer;
michael@0 151 if(buffer < bufLimit){
michael@0 152 *buffer++ = (uint16_t)(srcLen>>16);
michael@0 153 if(buffer<bufLimit){
michael@0 154 uint16_t runValue = src[0];
michael@0 155 int32_t runLength = 1;
michael@0 156 int i=1;
michael@0 157 *buffer++ = (uint16_t) srcLen;
michael@0 158
michael@0 159 for (; i<srcLen; ++i) {
michael@0 160 uint16_t s = src[i];
michael@0 161 if (s == runValue && runLength < 0xFFFF){
michael@0 162 ++runLength;
michael@0 163 }else {
michael@0 164 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
michael@0 165 runValue = s;
michael@0 166 runLength = 1;
michael@0 167 }
michael@0 168 }
michael@0 169 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
michael@0 170 }else{
michael@0 171 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 172 }
michael@0 173 }else{
michael@0 174 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 175 }
michael@0 176 return (int32_t)(buffer - saveBuffer);
michael@0 177 }
michael@0 178
michael@0 179 /**
michael@0 180 * Construct a string representing a byte array. Use run-length encoding.
michael@0 181 * Two bytes are packed into a single char, with a single extra zero byte at
michael@0 182 * the end if needed. A byte represents itself, unless it is the
michael@0 183 * ESCAPE_BYTE. Then the following notations are possible:
michael@0 184 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
michael@0 185 * ESCAPE_BYTE n b n instances of byte b
michael@0 186 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
michael@0 187 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
michael@0 188 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
michael@0 189 * b ESCAPE_BYTE n-1 b
michael@0 190 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
michael@0 191 * seen values.
michael@0 192 */
michael@0 193 int32_t
michael@0 194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
michael@0 195 const uint16_t* saveBuf = buffer;
michael@0 196 uint16_t* bufLimit = buffer+bufLen;
michael@0 197 if(buffer < bufLimit){
michael@0 198 *buffer++ = ((uint16_t) (srcLen >> 16));
michael@0 199
michael@0 200 if(buffer<bufLimit){
michael@0 201 uint8_t runValue = src[0];
michael@0 202 int runLength = 1;
michael@0 203 uint8_t state[2]= {0};
michael@0 204 int i=1;
michael@0 205 *buffer++=((uint16_t) srcLen);
michael@0 206 for (; i<srcLen; ++i) {
michael@0 207 uint8_t b = src[i];
michael@0 208 if (b == runValue && runLength < 0xFF){
michael@0 209 ++runLength;
michael@0 210 }
michael@0 211 else {
michael@0 212 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
michael@0 213 runValue = b;
michael@0 214 runLength = 1;
michael@0 215 }
michael@0 216 }
michael@0 217 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
michael@0 218
michael@0 219 /* We must save the final byte, if there is one, by padding
michael@0 220 * an extra zero.
michael@0 221 */
michael@0 222 if (state[0] != 0) {
michael@0 223 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
michael@0 224 }
michael@0 225 }else{
michael@0 226 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 227 }
michael@0 228 }else{
michael@0 229 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 230 }
michael@0 231 return (int32_t) (buffer - saveBuf);
michael@0 232 }
michael@0 233
michael@0 234
michael@0 235 /**
michael@0 236 * Construct an array of shorts from a run-length encoded string.
michael@0 237 */
michael@0 238 int32_t
michael@0 239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
michael@0 240 int32_t length = 0;
michael@0 241 int32_t ai = 0;
michael@0 242 int i=2;
michael@0 243
michael@0 244 if(!status || U_FAILURE(*status)){
michael@0 245 return 0;
michael@0 246 }
michael@0 247 /* the source is null terminated */
michael@0 248 if(srcLen == -1){
michael@0 249 srcLen = u_strlen(src);
michael@0 250 }
michael@0 251 if(srcLen <= 2){
michael@0 252 return 2;
michael@0 253 }
michael@0 254 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
michael@0 255
michael@0 256 if(target == NULL){
michael@0 257 return length;
michael@0 258 }
michael@0 259 if(tgtLen < length){
michael@0 260 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 261 return length;
michael@0 262 }
michael@0 263
michael@0 264 for (; i<srcLen; ++i) {
michael@0 265 uint16_t c = src[i];
michael@0 266 if (c == ESCAPE) {
michael@0 267 c = src[++i];
michael@0 268 if (c == ESCAPE) {
michael@0 269 target[ai++] = c;
michael@0 270 } else {
michael@0 271 int32_t runLength = (int32_t) c;
michael@0 272 uint16_t runValue = src[++i];
michael@0 273 int j=0;
michael@0 274 for (; j<runLength; ++j) {
michael@0 275 target[ai++] = runValue;
michael@0 276 }
michael@0 277 }
michael@0 278 }
michael@0 279 else {
michael@0 280 target[ai++] = c;
michael@0 281 }
michael@0 282 }
michael@0 283
michael@0 284 if (ai != length){
michael@0 285 *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 286 }
michael@0 287
michael@0 288 return length;
michael@0 289 }
michael@0 290
michael@0 291 /**
michael@0 292 * Construct an array of bytes from a run-length encoded string.
michael@0 293 */
michael@0 294 int32_t
michael@0 295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
michael@0 296
michael@0 297 int32_t length = 0;
michael@0 298 UBool nextChar = TRUE;
michael@0 299 uint16_t c = 0;
michael@0 300 int32_t node = 0;
michael@0 301 int32_t runLength = 0;
michael@0 302 int32_t i = 2;
michael@0 303 int32_t ai=0;
michael@0 304
michael@0 305 if(!status || U_FAILURE(*status)){
michael@0 306 return 0;
michael@0 307 }
michael@0 308 /* the source is null terminated */
michael@0 309 if(srcLen == -1){
michael@0 310 srcLen = u_strlen(src);
michael@0 311 }
michael@0 312 if(srcLen <= 2){
michael@0 313 return 2;
michael@0 314 }
michael@0 315 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
michael@0 316
michael@0 317 if(target == NULL){
michael@0 318 return length;
michael@0 319 }
michael@0 320 if(tgtLen < length){
michael@0 321 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 322 return length;
michael@0 323 }
michael@0 324
michael@0 325 for (; ai<tgtLen; ) {
michael@0 326 /* This part of the loop places the next byte into the local
michael@0 327 * variable 'b' each time through the loop. It keeps the
michael@0 328 * current character in 'c' and uses the boolean 'nextChar'
michael@0 329 * to see if we've taken both bytes out of 'c' yet.
michael@0 330 */
michael@0 331 uint8_t b;
michael@0 332 if (nextChar) {
michael@0 333 c = src[i++];
michael@0 334 b = (uint8_t) (c >> 8);
michael@0 335 nextChar = FALSE;
michael@0 336 }
michael@0 337 else {
michael@0 338 b = (uint8_t) (c & 0xFF);
michael@0 339 nextChar = TRUE;
michael@0 340 }
michael@0 341
michael@0 342 /* This part of the loop is a tiny state machine which handles
michael@0 343 * the parsing of the run-length encoding. This would be simpler
michael@0 344 * if we could look ahead, but we can't, so we use 'node' to
michael@0 345 * move between three nodes in the state machine.
michael@0 346 */
michael@0 347 switch (node) {
michael@0 348 case 0:
michael@0 349 /* Normal idle node */
michael@0 350 if (b == ESCAPE_BYTE) {
michael@0 351 node = 1;
michael@0 352 }
michael@0 353 else {
michael@0 354 target[ai++] = b;
michael@0 355 }
michael@0 356 break;
michael@0 357 case 1:
michael@0 358 /* We have seen one ESCAPE_BYTE; we expect either a second
michael@0 359 * one, or a run length and value.
michael@0 360 */
michael@0 361 if (b == ESCAPE_BYTE) {
michael@0 362 target[ai++] = ESCAPE_BYTE;
michael@0 363 node = 0;
michael@0 364 }
michael@0 365 else {
michael@0 366 runLength = b;
michael@0 367 node = 2;
michael@0 368 }
michael@0 369 break;
michael@0 370 case 2:
michael@0 371 {
michael@0 372 int j=0;
michael@0 373 /* We have seen an ESCAPE_BYTE and length byte. We interpret
michael@0 374 * the next byte as the value to be repeated.
michael@0 375 */
michael@0 376 for (; j<runLength; ++j){
michael@0 377 if(ai<tgtLen){
michael@0 378 target[ai++] = b;
michael@0 379 }else{
michael@0 380 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 381 return ai;
michael@0 382 }
michael@0 383 }
michael@0 384 node = 0;
michael@0 385 break;
michael@0 386 }
michael@0 387 }
michael@0 388 }
michael@0 389
michael@0 390 if (node != 0){
michael@0 391 *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 392 /*("Bad run-length encoded byte array")*/
michael@0 393 return 0;
michael@0 394 }
michael@0 395
michael@0 396
michael@0 397 if (i != srcLen){
michael@0 398 /*("Excess data in RLE byte array string");*/
michael@0 399 *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 400 return ai;
michael@0 401 }
michael@0 402
michael@0 403 return ai;
michael@0 404 }
michael@0 405

mercurial