intl/icu/source/i18n/bocsu.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2001-2011, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  bocsu.cpp
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   Author: Markus W. Scherer
    12 *
    13 *   Modification history:
    14 *   05/18/2001  weiv    Made into separate module
    15 */
    18 #include "unicode/utypes.h"
    20 #if !UCONFIG_NO_COLLATION
    22 #include "unicode/bytestream.h"
    23 #include "unicode/utf16.h"
    24 #include "bocsu.h"
    26 /*
    27  * encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
    28  * preserving lexical order
    29  */
    30 U_CFUNC uint8_t *
    31 u_writeDiff(int32_t diff, uint8_t *p) {
    32     if(diff>=SLOPE_REACH_NEG_1) {
    33         if(diff<=SLOPE_REACH_POS_1) {
    34             *p++=(uint8_t)(SLOPE_MIDDLE+diff);
    35         } else if(diff<=SLOPE_REACH_POS_2) {
    36             *p++=(uint8_t)(SLOPE_START_POS_2+(diff/SLOPE_TAIL_COUNT));
    37             *p++=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
    38         } else if(diff<=SLOPE_REACH_POS_3) {
    39             p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
    40             diff/=SLOPE_TAIL_COUNT;
    41             p[1]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
    42             *p=(uint8_t)(SLOPE_START_POS_3+(diff/SLOPE_TAIL_COUNT));
    43             p+=3;
    44         } else {
    45             p[3]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
    46             diff/=SLOPE_TAIL_COUNT;
    47             p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
    48             diff/=SLOPE_TAIL_COUNT;
    49             p[1]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
    50             *p=SLOPE_MAX;
    51             p+=4;
    52         }
    53     } else {
    54         int32_t m;
    56         if(diff>=SLOPE_REACH_NEG_2) {
    57             NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
    58             *p++=(uint8_t)(SLOPE_START_NEG_2+diff);
    59             *p++=(uint8_t)(SLOPE_MIN+m);
    60         } else if(diff>=SLOPE_REACH_NEG_3) {
    61             NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
    62             p[2]=(uint8_t)(SLOPE_MIN+m);
    63             NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
    64             p[1]=(uint8_t)(SLOPE_MIN+m);
    65             *p=(uint8_t)(SLOPE_START_NEG_3+diff);
    66             p+=3;
    67         } else {
    68             NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
    69             p[3]=(uint8_t)(SLOPE_MIN+m);
    70             NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
    71             p[2]=(uint8_t)(SLOPE_MIN+m);
    72             NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
    73             p[1]=(uint8_t)(SLOPE_MIN+m);
    74             *p=SLOPE_MIN;
    75             p+=4;
    76         }
    77     }
    78     return p;
    79 }
    81 /*
    82  * Encode the code points of a string as
    83  * a sequence of byte-encoded differences (slope detection),
    84  * preserving lexical order.
    85  *
    86  * Optimize the difference-taking for runs of Unicode text within
    87  * small scripts:
    88  *
    89  * Most small scripts are allocated within aligned 128-blocks of Unicode
    90  * code points. Lexical order is preserved if "prev" is always moved
    91  * into the middle of such a block.
    92  *
    93  * Additionally, "prev" is moved from anywhere in the Unihan
    94  * area into the middle of that area.
    95  * Note that the identical-level run in a sort key is generated from
    96  * NFD text - there are never Hangul characters included.
    97  */
    98 U_CFUNC void
    99 u_writeIdenticalLevelRun(const UChar *s, int32_t length, icu::ByteSink &sink) {
   100     char scratch[64];
   101     int32_t capacity;
   103     UChar32 prev=0;
   104     int32_t i=0;
   105     while(i<length) {
   106         char *buffer=sink.GetAppendBuffer(1, length*2, scratch, (int32_t)sizeof(scratch), &capacity);
   107         uint8_t *p;
   108         // We must have capacity>=SLOPE_MAX_BYTES in case u_writeDiff() writes that much,
   109         // but we do not want to force the sink.GetAppendBuffer() to allocate
   110         // for a large min_capacity because we might actually only write one byte.
   111         if(capacity<16) {
   112             buffer=scratch;
   113             capacity=(int32_t)sizeof(scratch);
   114         }
   115         p=reinterpret_cast<uint8_t *>(buffer);
   116         uint8_t *lastSafe=p+capacity-SLOPE_MAX_BYTES;
   117         while(i<length && p<=lastSafe) {
   118             if(prev<0x4e00 || prev>=0xa000) {
   119                 prev=(prev&~0x7f)-SLOPE_REACH_NEG_1;
   120             } else {
   121                 /*
   122                  * Unihan U+4e00..U+9fa5:
   123                  * double-bytes down from the upper end
   124                  */
   125                 prev=0x9fff-SLOPE_REACH_POS_2;
   126             }
   128             UChar32 c;
   129             U16_NEXT(s, i, length, c);
   130             p=u_writeDiff(c-prev, p);
   131             prev=c;
   132         }
   133         sink.Append(buffer, (int32_t)(p-reinterpret_cast<uint8_t *>(buffer)));
   134     }
   135 }
   137 U_CFUNC int32_t
   138 u_writeIdenticalLevelRunTwoChars(UChar32 first, UChar32 second, uint8_t *p) {
   139     uint8_t *p0 = p;
   140     if(first<0x4e00 || first>=0xa000) {
   141         first=(first&~0x7f)-SLOPE_REACH_NEG_1;
   142     } else {
   143         /*
   144          * Unihan U+4e00..U+9fa5:
   145          * double-bytes down from the upper end
   146          */
   147         first=0x9fff-SLOPE_REACH_POS_2;
   148     }
   150     p=u_writeDiff(second-first, p);
   151     return (int32_t)(p-p0);
   152 }
   154 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial