intl/icu/source/tools/genrb/rle.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2000-2003, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *
     9 * File writejava.c
    10 *
    11 * Modification History:
    12 *
    13 *   Date        Name        Description
    14 *   01/11/02    Ram        Creation.
    15 *******************************************************************************
    16 */
    17 #include "rle.h"
    18 /**
    19  * The ESCAPE character is used during run-length encoding.  It signals
    20  * a run of identical chars.
    21  */
    22 static const uint16_t ESCAPE = 0xA5A5;
    24 /**
    25  * The ESCAPE_BYTE character is used during run-length encoding.  It signals
    26  * a run of identical bytes.
    27  */
    28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
    30 /**
    31  * Append a byte to the given StringBuffer, packing two bytes into each
    32  * character.  The state parameter maintains intermediary data between
    33  * calls.
    34  * @param state A two-element array, with state[0] == 0 if this is the
    35  * first byte of a pair, or state[0] != 0 if this is the second byte
    36  * of a pair, in which case state[1] is the first byte.
    37  */
    38 static uint16_t*
    39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
    40     if(!status || U_FAILURE(*status)){
    41         return NULL;
    42     }
    43     if (state[0] != 0) {
    44         uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
    45         if(buffer < buffLimit){
    46             *buffer++ = c;
    47         }else{
    48             *status = U_BUFFER_OVERFLOW_ERROR;
    49         }
    50         state[0] = 0;
    51         return buffer;
    52     }
    53     else {
    54         state[0] = 1;
    55         state[1] = value;
    56         return buffer;
    57     }
    58 }
    59 /**
    60  * Encode a run, possibly a degenerate run (of < 4 values).
    61  * @param length The length of the run; must be > 0 && <= 0xFF.
    62  */
    63 static uint16_t*
    64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
    65     if(!status || U_FAILURE(*status)){
    66         return NULL;
    67     }
    68     if (length < 4) {
    69         int32_t j=0;
    70         for (; j<length; ++j) {
    71             if (value == ESCAPE_BYTE) {
    72                 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
    73             }
    74             buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
    75         }
    76     }
    77     else {
    78         if (length == ESCAPE_BYTE) {
    79             if (value == ESCAPE_BYTE){
    80                buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
    81             }
    82             buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
    83             --length;
    84         }
    85         buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
    86         buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
    87         buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
    88     }
    89     return buffer;
    90 }
    92 #define APPEND( buffer, bufLimit, value, num, status){  \
    93     if(buffer<bufLimit){                    \
    94         *buffer++=(value);                  \
    95     }else{                                  \
    96         *status = U_BUFFER_OVERFLOW_ERROR;  \
    97     }                                       \
    98     num++;                                  \
    99 }
   101 /**
   102  * Encode a run, possibly a degenerate run (of < 4 values).
   103  * @param length The length of the run; must be > 0 && <= 0xFFFF.
   104  */
   105 static uint16_t*
   106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
   107     int32_t num=0;
   108     if (length < 4) {
   109         int j=0;
   110         for (; j<length; ++j) {
   111             if (value == (int32_t) ESCAPE){
   112                 APPEND(buffer,bufLimit,ESCAPE, num, status);
   114             }
   115             APPEND(buffer,bufLimit,value,num, status);
   116         }
   117     }
   118     else {
   119         if (length == (int32_t) ESCAPE) {
   120             if (value == (int32_t) ESCAPE){
   121                 APPEND(buffer,bufLimit,ESCAPE,num,status);
   123             }
   124             APPEND(buffer,bufLimit,value,num,status);
   125             --length;
   126         }
   127         APPEND(buffer,bufLimit,ESCAPE,num,status);
   128         APPEND(buffer,bufLimit,(uint16_t) length, num,status);
   129         APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
   130     }
   131     return buffer;
   132 }
   134 /**
   135  * Construct a string representing a char array.  Use run-length encoding.
   136  * A character represents itself, unless it is the ESCAPE character.  Then
   137  * the following notations are possible:
   138  *   ESCAPE ESCAPE   ESCAPE literal
   139  *   ESCAPE n c      n instances of character c
   140  * Since an encoded run occupies 3 characters, we only encode runs of 4 or
   141  * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
   142  * If we encounter a run where n == ESCAPE, we represent this as:
   143  *   c ESCAPE n-1 c
   144  * The ESCAPE value is chosen so as not to collide with commonly
   145  * seen values.
   146  */
   147 int32_t
   148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
   149     uint16_t* bufLimit =  buffer+bufLen;
   150     uint16_t* saveBuffer = buffer;
   151     if(buffer < bufLimit){
   152         *buffer++ =  (uint16_t)(srcLen>>16);
   153         if(buffer<bufLimit){
   154             uint16_t runValue = src[0];
   155             int32_t runLength = 1;
   156             int i=1;
   157             *buffer++ = (uint16_t) srcLen;
   159             for (; i<srcLen; ++i) {
   160                 uint16_t s = src[i];
   161                 if (s == runValue && runLength < 0xFFFF){
   162                     ++runLength;
   163                 }else {
   164                     buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
   165                     runValue = s;
   166                     runLength = 1;
   167                 }
   168             }
   169             buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
   170         }else{
   171             *status = U_BUFFER_OVERFLOW_ERROR;
   172         }
   173     }else{
   174         *status = U_BUFFER_OVERFLOW_ERROR;
   175     }
   176     return (int32_t)(buffer - saveBuffer);
   177 }
   179 /**
   180  * Construct a string representing a byte array.  Use run-length encoding.
   181  * Two bytes are packed into a single char, with a single extra zero byte at
   182  * the end if needed.  A byte represents itself, unless it is the
   183  * ESCAPE_BYTE.  Then the following notations are possible:
   184  *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
   185  *   ESCAPE_BYTE n b           n instances of byte b
   186  * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
   187  * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
   188  * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
   189  *   b ESCAPE_BYTE n-1 b
   190  * The ESCAPE_BYTE value is chosen so as not to collide with commonly
   191  * seen values.
   192  */
   193 int32_t
   194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
   195     const uint16_t* saveBuf = buffer;
   196     uint16_t* bufLimit =  buffer+bufLen;
   197     if(buffer < bufLimit){
   198         *buffer++ = ((uint16_t) (srcLen >> 16));
   200         if(buffer<bufLimit){
   201             uint8_t runValue = src[0];
   202             int runLength = 1;
   203             uint8_t state[2]= {0};
   204             int i=1;
   205             *buffer++=((uint16_t) srcLen);
   206             for (; i<srcLen; ++i) {
   207                 uint8_t b = src[i];
   208                 if (b == runValue && runLength < 0xFF){
   209                     ++runLength;
   210                 }
   211                 else {
   212                     buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
   213                     runValue = b;
   214                     runLength = 1;
   215                 }
   216             }
   217             buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
   219             /* We must save the final byte, if there is one, by padding
   220              * an extra zero.
   221              */
   222             if (state[0] != 0) {
   223                 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
   224             }
   225         }else{
   226             *status = U_BUFFER_OVERFLOW_ERROR;
   227         }
   228     }else{
   229         *status = U_BUFFER_OVERFLOW_ERROR;
   230     }
   231     return (int32_t) (buffer - saveBuf);
   232 }
   235 /**
   236  * Construct an array of shorts from a run-length encoded string.
   237  */
   238 int32_t
   239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
   240     int32_t length = 0;
   241     int32_t ai = 0;
   242     int i=2;
   244     if(!status || U_FAILURE(*status)){
   245         return 0;
   246     }
   247     /* the source is null terminated */
   248     if(srcLen == -1){
   249         srcLen = u_strlen(src);
   250     }
   251     if(srcLen <= 2){
   252         return 2;
   253     }
   254     length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
   256     if(target == NULL){
   257         return length;
   258     }
   259     if(tgtLen < length){
   260         *status = U_BUFFER_OVERFLOW_ERROR;
   261         return length;
   262     }
   264     for (; i<srcLen; ++i) {
   265         uint16_t c = src[i];
   266         if (c == ESCAPE) {
   267             c = src[++i];
   268             if (c == ESCAPE) {
   269                 target[ai++] = c;
   270             } else {
   271                 int32_t runLength = (int32_t) c;
   272                 uint16_t runValue = src[++i];
   273                 int j=0;
   274                 for (; j<runLength; ++j) {
   275                     target[ai++] = runValue;
   276                 }
   277             }
   278         }
   279         else {
   280             target[ai++] = c;
   281         }
   282     }
   284     if (ai != length){
   285         *status = U_INTERNAL_PROGRAM_ERROR;
   286     }
   288     return length;
   289 }
   291 /**
   292  * Construct an array of bytes from a run-length encoded string.
   293  */
   294 int32_t
   295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
   297     int32_t length = 0;
   298     UBool nextChar = TRUE;
   299     uint16_t c = 0;
   300     int32_t node = 0;
   301     int32_t runLength = 0;
   302     int32_t i = 2;
   303     int32_t ai=0;
   305     if(!status || U_FAILURE(*status)){
   306         return 0;
   307     }
   308     /* the source is null terminated */
   309     if(srcLen == -1){
   310         srcLen = u_strlen(src);
   311     }
   312     if(srcLen <= 2){
   313         return 2;
   314     }
   315     length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
   317     if(target == NULL){
   318         return length;
   319     }
   320     if(tgtLen < length){
   321         *status = U_BUFFER_OVERFLOW_ERROR;
   322         return length;
   323     }
   325     for (; ai<tgtLen; ) {
   326        /* This part of the loop places the next byte into the local
   327         * variable 'b' each time through the loop.  It keeps the
   328         * current character in 'c' and uses the boolean 'nextChar'
   329         * to see if we've taken both bytes out of 'c' yet.
   330         */
   331         uint8_t b;
   332         if (nextChar) {
   333             c = src[i++];
   334             b = (uint8_t) (c >> 8);
   335             nextChar = FALSE;
   336         }
   337         else {
   338             b = (uint8_t) (c & 0xFF);
   339             nextChar = TRUE;
   340         }
   342        /* This part of the loop is a tiny state machine which handles
   343         * the parsing of the run-length encoding.  This would be simpler
   344         * if we could look ahead, but we can't, so we use 'node' to
   345         * move between three nodes in the state machine.
   346         */
   347         switch (node) {
   348         case 0:
   349             /* Normal idle node */
   350             if (b == ESCAPE_BYTE) {
   351                 node = 1;
   352             }
   353             else {
   354                 target[ai++] = b;
   355             }
   356             break;
   357         case 1:
   358            /* We have seen one ESCAPE_BYTE; we expect either a second
   359             * one, or a run length and value.
   360             */
   361             if (b == ESCAPE_BYTE) {
   362                 target[ai++] = ESCAPE_BYTE;
   363                 node = 0;
   364             }
   365             else {
   366                 runLength = b;
   367                 node = 2;
   368             }
   369             break;
   370         case 2:
   371             {
   372                 int j=0;
   373                /* We have seen an ESCAPE_BYTE and length byte.  We interpret
   374                 * the next byte as the value to be repeated.
   375                 */
   376                 for (; j<runLength; ++j){
   377                     if(ai<tgtLen){
   378                         target[ai++] = b;
   379                     }else{
   380                         *status = U_BUFFER_OVERFLOW_ERROR;
   381                         return ai;
   382                     }
   383                 }
   384                 node = 0;
   385                 break;
   386             }
   387         }
   388     }
   390     if (node != 0){
   391         *status = U_INTERNAL_PROGRAM_ERROR;
   392         /*("Bad run-length encoded byte array")*/
   393         return 0;
   394     }
   397     if (i != srcLen){
   398         /*("Excess data in RLE byte array string");*/
   399         *status = U_INTERNAL_PROGRAM_ERROR;
   400         return ai;
   401     }
   403     return ai;
   404 }

mercurial