intl/icu/source/tools/genrb/rle.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/genrb/rle.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,405 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2000-2003, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*
    1.12 +* File writejava.c
    1.13 +*
    1.14 +* Modification History:
    1.15 +*
    1.16 +*   Date        Name        Description
    1.17 +*   01/11/02    Ram        Creation.
    1.18 +*******************************************************************************
    1.19 +*/
    1.20 +#include "rle.h"
    1.21 +/**
    1.22 + * The ESCAPE character is used during run-length encoding.  It signals
    1.23 + * a run of identical chars.
    1.24 + */
    1.25 +static const uint16_t ESCAPE = 0xA5A5;
    1.26 +
    1.27 +/**
    1.28 + * The ESCAPE_BYTE character is used during run-length encoding.  It signals
    1.29 + * a run of identical bytes.
    1.30 + */
    1.31 +static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
    1.32 +
    1.33 +/**
    1.34 + * Append a byte to the given StringBuffer, packing two bytes into each
    1.35 + * character.  The state parameter maintains intermediary data between
    1.36 + * calls.
    1.37 + * @param state A two-element array, with state[0] == 0 if this is the
    1.38 + * first byte of a pair, or state[0] != 0 if this is the second byte
    1.39 + * of a pair, in which case state[1] is the first byte.
    1.40 + */
    1.41 +static uint16_t*
    1.42 +appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
    1.43 +    if(!status || U_FAILURE(*status)){
    1.44 +        return NULL;
    1.45 +    }
    1.46 +    if (state[0] != 0) {
    1.47 +        uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
    1.48 +        if(buffer < buffLimit){
    1.49 +            *buffer++ = c;
    1.50 +        }else{
    1.51 +            *status = U_BUFFER_OVERFLOW_ERROR;
    1.52 +        }
    1.53 +        state[0] = 0;
    1.54 +        return buffer;
    1.55 +    }
    1.56 +    else {
    1.57 +        state[0] = 1;
    1.58 +        state[1] = value;
    1.59 +        return buffer;
    1.60 +    }
    1.61 +}
    1.62 +/**
    1.63 + * Encode a run, possibly a degenerate run (of < 4 values).
    1.64 + * @param length The length of the run; must be > 0 && <= 0xFF.
    1.65 + */
    1.66 +static uint16_t*
    1.67 +encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
    1.68 +    if(!status || U_FAILURE(*status)){
    1.69 +        return NULL;
    1.70 +    }
    1.71 +    if (length < 4) {
    1.72 +        int32_t j=0;
    1.73 +        for (; j<length; ++j) {
    1.74 +            if (value == ESCAPE_BYTE) {
    1.75 +                buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
    1.76 +            }
    1.77 +            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
    1.78 +        }
    1.79 +    }
    1.80 +    else {
    1.81 +        if (length == ESCAPE_BYTE) {
    1.82 +            if (value == ESCAPE_BYTE){
    1.83 +               buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
    1.84 +            }
    1.85 +            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
    1.86 +            --length;
    1.87 +        }
    1.88 +        buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
    1.89 +        buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
    1.90 +        buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
    1.91 +    }
    1.92 +    return buffer;
    1.93 +}
    1.94 +
    1.95 +#define APPEND( buffer, bufLimit, value, num, status){  \
    1.96 +    if(buffer<bufLimit){                    \
    1.97 +        *buffer++=(value);                  \
    1.98 +    }else{                                  \
    1.99 +        *status = U_BUFFER_OVERFLOW_ERROR;  \
   1.100 +    }                                       \
   1.101 +    num++;                                  \
   1.102 +}
   1.103 +
   1.104 +/**
   1.105 + * Encode a run, possibly a degenerate run (of < 4 values).
   1.106 + * @param length The length of the run; must be > 0 && <= 0xFFFF.
   1.107 + */
   1.108 +static uint16_t*
   1.109 +encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
   1.110 +    int32_t num=0;
   1.111 +    if (length < 4) {
   1.112 +        int j=0;
   1.113 +        for (; j<length; ++j) {
   1.114 +            if (value == (int32_t) ESCAPE){
   1.115 +                APPEND(buffer,bufLimit,ESCAPE, num, status);
   1.116 +
   1.117 +            }
   1.118 +            APPEND(buffer,bufLimit,value,num, status);
   1.119 +        }
   1.120 +    }
   1.121 +    else {
   1.122 +        if (length == (int32_t) ESCAPE) {
   1.123 +            if (value == (int32_t) ESCAPE){
   1.124 +                APPEND(buffer,bufLimit,ESCAPE,num,status);
   1.125 +
   1.126 +            }
   1.127 +            APPEND(buffer,bufLimit,value,num,status);
   1.128 +            --length;
   1.129 +        }
   1.130 +        APPEND(buffer,bufLimit,ESCAPE,num,status);
   1.131 +        APPEND(buffer,bufLimit,(uint16_t) length, num,status);
   1.132 +        APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
   1.133 +    }
   1.134 +    return buffer;
   1.135 +}
   1.136 +
   1.137 +/**
   1.138 + * Construct a string representing a char array.  Use run-length encoding.
   1.139 + * A character represents itself, unless it is the ESCAPE character.  Then
   1.140 + * the following notations are possible:
   1.141 + *   ESCAPE ESCAPE   ESCAPE literal
   1.142 + *   ESCAPE n c      n instances of character c
   1.143 + * Since an encoded run occupies 3 characters, we only encode runs of 4 or
   1.144 + * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
   1.145 + * If we encounter a run where n == ESCAPE, we represent this as:
   1.146 + *   c ESCAPE n-1 c
   1.147 + * The ESCAPE value is chosen so as not to collide with commonly
   1.148 + * seen values.
   1.149 + */
   1.150 +int32_t
   1.151 +usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
   1.152 +    uint16_t* bufLimit =  buffer+bufLen;
   1.153 +    uint16_t* saveBuffer = buffer;
   1.154 +    if(buffer < bufLimit){
   1.155 +        *buffer++ =  (uint16_t)(srcLen>>16);
   1.156 +        if(buffer<bufLimit){
   1.157 +            uint16_t runValue = src[0];
   1.158 +            int32_t runLength = 1;
   1.159 +            int i=1;
   1.160 +            *buffer++ = (uint16_t) srcLen;
   1.161 +
   1.162 +            for (; i<srcLen; ++i) {
   1.163 +                uint16_t s = src[i];
   1.164 +                if (s == runValue && runLength < 0xFFFF){
   1.165 +                    ++runLength;
   1.166 +                }else {
   1.167 +                    buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
   1.168 +                    runValue = s;
   1.169 +                    runLength = 1;
   1.170 +                }
   1.171 +            }
   1.172 +            buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
   1.173 +        }else{
   1.174 +            *status = U_BUFFER_OVERFLOW_ERROR;
   1.175 +        }
   1.176 +    }else{
   1.177 +        *status = U_BUFFER_OVERFLOW_ERROR;
   1.178 +    }
   1.179 +    return (int32_t)(buffer - saveBuffer);
   1.180 +}
   1.181 +
   1.182 +/**
   1.183 + * Construct a string representing a byte array.  Use run-length encoding.
   1.184 + * Two bytes are packed into a single char, with a single extra zero byte at
   1.185 + * the end if needed.  A byte represents itself, unless it is the
   1.186 + * ESCAPE_BYTE.  Then the following notations are possible:
   1.187 + *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
   1.188 + *   ESCAPE_BYTE n b           n instances of byte b
   1.189 + * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
   1.190 + * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
   1.191 + * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
   1.192 + *   b ESCAPE_BYTE n-1 b
   1.193 + * The ESCAPE_BYTE value is chosen so as not to collide with commonly
   1.194 + * seen values.
   1.195 + */
   1.196 +int32_t
   1.197 +byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
   1.198 +    const uint16_t* saveBuf = buffer;
   1.199 +    uint16_t* bufLimit =  buffer+bufLen;
   1.200 +    if(buffer < bufLimit){
   1.201 +        *buffer++ = ((uint16_t) (srcLen >> 16));
   1.202 +
   1.203 +        if(buffer<bufLimit){
   1.204 +            uint8_t runValue = src[0];
   1.205 +            int runLength = 1;
   1.206 +            uint8_t state[2]= {0};
   1.207 +            int i=1;
   1.208 +            *buffer++=((uint16_t) srcLen);
   1.209 +            for (; i<srcLen; ++i) {
   1.210 +                uint8_t b = src[i];
   1.211 +                if (b == runValue && runLength < 0xFF){
   1.212 +                    ++runLength;
   1.213 +                }
   1.214 +                else {
   1.215 +                    buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
   1.216 +                    runValue = b;
   1.217 +                    runLength = 1;
   1.218 +                }
   1.219 +            }
   1.220 +            buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
   1.221 +
   1.222 +            /* We must save the final byte, if there is one, by padding
   1.223 +             * an extra zero.
   1.224 +             */
   1.225 +            if (state[0] != 0) {
   1.226 +                buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
   1.227 +            }
   1.228 +        }else{
   1.229 +            *status = U_BUFFER_OVERFLOW_ERROR;
   1.230 +        }
   1.231 +    }else{
   1.232 +        *status = U_BUFFER_OVERFLOW_ERROR;
   1.233 +    }
   1.234 +    return (int32_t) (buffer - saveBuf);
   1.235 +}
   1.236 +
   1.237 +
   1.238 +/**
   1.239 + * Construct an array of shorts from a run-length encoded string.
   1.240 + */
   1.241 +int32_t
   1.242 +rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
   1.243 +    int32_t length = 0;
   1.244 +    int32_t ai = 0;
   1.245 +    int i=2;
   1.246 +
   1.247 +    if(!status || U_FAILURE(*status)){
   1.248 +        return 0;
   1.249 +    }
   1.250 +    /* the source is null terminated */
   1.251 +    if(srcLen == -1){
   1.252 +        srcLen = u_strlen(src);
   1.253 +    }
   1.254 +    if(srcLen <= 2){
   1.255 +        return 2;
   1.256 +    }
   1.257 +    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
   1.258 +
   1.259 +    if(target == NULL){
   1.260 +        return length;
   1.261 +    }
   1.262 +    if(tgtLen < length){
   1.263 +        *status = U_BUFFER_OVERFLOW_ERROR;
   1.264 +        return length;
   1.265 +    }
   1.266 +
   1.267 +    for (; i<srcLen; ++i) {
   1.268 +        uint16_t c = src[i];
   1.269 +        if (c == ESCAPE) {
   1.270 +            c = src[++i];
   1.271 +            if (c == ESCAPE) {
   1.272 +                target[ai++] = c;
   1.273 +            } else {
   1.274 +                int32_t runLength = (int32_t) c;
   1.275 +                uint16_t runValue = src[++i];
   1.276 +                int j=0;
   1.277 +                for (; j<runLength; ++j) {
   1.278 +                    target[ai++] = runValue;
   1.279 +                }
   1.280 +            }
   1.281 +        }
   1.282 +        else {
   1.283 +            target[ai++] = c;
   1.284 +        }
   1.285 +    }
   1.286 +
   1.287 +    if (ai != length){
   1.288 +        *status = U_INTERNAL_PROGRAM_ERROR;
   1.289 +    }
   1.290 +
   1.291 +    return length;
   1.292 +}
   1.293 +
   1.294 +/**
   1.295 + * Construct an array of bytes from a run-length encoded string.
   1.296 + */
   1.297 +int32_t
   1.298 +rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
   1.299 +
   1.300 +    int32_t length = 0;
   1.301 +    UBool nextChar = TRUE;
   1.302 +    uint16_t c = 0;
   1.303 +    int32_t node = 0;
   1.304 +    int32_t runLength = 0;
   1.305 +    int32_t i = 2;
   1.306 +    int32_t ai=0;
   1.307 +
   1.308 +    if(!status || U_FAILURE(*status)){
   1.309 +        return 0;
   1.310 +    }
   1.311 +    /* the source is null terminated */
   1.312 +    if(srcLen == -1){
   1.313 +        srcLen = u_strlen(src);
   1.314 +    }
   1.315 +    if(srcLen <= 2){
   1.316 +        return 2;
   1.317 +    }
   1.318 +    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
   1.319 +
   1.320 +    if(target == NULL){
   1.321 +        return length;
   1.322 +    }
   1.323 +    if(tgtLen < length){
   1.324 +        *status = U_BUFFER_OVERFLOW_ERROR;
   1.325 +        return length;
   1.326 +    }
   1.327 +
   1.328 +    for (; ai<tgtLen; ) {
   1.329 +       /* This part of the loop places the next byte into the local
   1.330 +        * variable 'b' each time through the loop.  It keeps the
   1.331 +        * current character in 'c' and uses the boolean 'nextChar'
   1.332 +        * to see if we've taken both bytes out of 'c' yet.
   1.333 +        */
   1.334 +        uint8_t b;
   1.335 +        if (nextChar) {
   1.336 +            c = src[i++];
   1.337 +            b = (uint8_t) (c >> 8);
   1.338 +            nextChar = FALSE;
   1.339 +        }
   1.340 +        else {
   1.341 +            b = (uint8_t) (c & 0xFF);
   1.342 +            nextChar = TRUE;
   1.343 +        }
   1.344 +
   1.345 +       /* This part of the loop is a tiny state machine which handles
   1.346 +        * the parsing of the run-length encoding.  This would be simpler
   1.347 +        * if we could look ahead, but we can't, so we use 'node' to
   1.348 +        * move between three nodes in the state machine.
   1.349 +        */
   1.350 +        switch (node) {
   1.351 +        case 0:
   1.352 +            /* Normal idle node */
   1.353 +            if (b == ESCAPE_BYTE) {
   1.354 +                node = 1;
   1.355 +            }
   1.356 +            else {
   1.357 +                target[ai++] = b;
   1.358 +            }
   1.359 +            break;
   1.360 +        case 1:
   1.361 +           /* We have seen one ESCAPE_BYTE; we expect either a second
   1.362 +            * one, or a run length and value.
   1.363 +            */
   1.364 +            if (b == ESCAPE_BYTE) {
   1.365 +                target[ai++] = ESCAPE_BYTE;
   1.366 +                node = 0;
   1.367 +            }
   1.368 +            else {
   1.369 +                runLength = b;
   1.370 +                node = 2;
   1.371 +            }
   1.372 +            break;
   1.373 +        case 2:
   1.374 +            {
   1.375 +                int j=0;
   1.376 +               /* We have seen an ESCAPE_BYTE and length byte.  We interpret
   1.377 +                * the next byte as the value to be repeated.
   1.378 +                */
   1.379 +                for (; j<runLength; ++j){
   1.380 +                    if(ai<tgtLen){
   1.381 +                        target[ai++] = b;
   1.382 +                    }else{
   1.383 +                        *status = U_BUFFER_OVERFLOW_ERROR;
   1.384 +                        return ai;
   1.385 +                    }
   1.386 +                }
   1.387 +                node = 0;
   1.388 +                break;
   1.389 +            }
   1.390 +        }
   1.391 +    }
   1.392 +
   1.393 +    if (node != 0){
   1.394 +        *status = U_INTERNAL_PROGRAM_ERROR;
   1.395 +        /*("Bad run-length encoded byte array")*/
   1.396 +        return 0;
   1.397 +    }
   1.398 +
   1.399 +
   1.400 +    if (i != srcLen){
   1.401 +        /*("Excess data in RLE byte array string");*/
   1.402 +        *status = U_INTERNAL_PROGRAM_ERROR;
   1.403 +        return ai;
   1.404 +    }
   1.405 +
   1.406 +    return ai;
   1.407 +}
   1.408 +

mercurial