intl/icu/source/i18n/titletrn.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2001-2011, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *   Date        Name        Description
     7 *   05/24/01    aliu        Creation.
     8 **********************************************************************
     9 */
    11 #include "unicode/utypes.h"
    13 #if !UCONFIG_NO_TRANSLITERATION
    15 #include "unicode/uchar.h"
    16 #include "unicode/uniset.h"
    17 #include "unicode/ustring.h"
    18 #include "unicode/utf16.h"
    19 #include "titletrn.h"
    20 #include "umutex.h"
    21 #include "ucase.h"
    22 #include "cpputils.h"
    24 U_NAMESPACE_BEGIN
    26 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
    28 TitlecaseTransliterator::TitlecaseTransliterator() :
    29     CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL)
    30 {
    31     // Need to look back 2 characters in the case of "can't"
    32     setMaximumContextLength(2);
    33 }
    35 /**
    36  * Destructor.
    37  */
    38 TitlecaseTransliterator::~TitlecaseTransliterator() {
    39 }
    41 /**
    42  * Copy constructor.
    43  */
    44 TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
    45     CaseMapTransliterator(o)
    46 {
    47 }
    49 /**
    50  * Assignment operator.
    51  */
    52 /*TitlecaseTransliterator& TitlecaseTransliterator::operator=(
    53                              const TitlecaseTransliterator& o) {
    54     CaseMapTransliterator::operator=(o);
    55     return *this;
    56 }*/
    58 /**
    59  * Transliterator API.
    60  */
    61 Transliterator* TitlecaseTransliterator::clone(void) const {
    62     return new TitlecaseTransliterator(*this);
    63 }
    65 /**
    66  * Implements {@link Transliterator#handleTransliterate}.
    67  */
    68 void TitlecaseTransliterator::handleTransliterate(
    69                                   Replaceable& text, UTransPosition& offsets,
    70                                   UBool isIncremental) const
    71 {
    72     // TODO reimplement, see ustrcase.c
    73     // using a real word break iterator
    74     //   instead of just looking for a transition between cased and uncased characters
    75     // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
    76     // needs to take isIncremental into account because case mappings are context-sensitive
    77     //   also detect when lowercasing function did not finish because of context
    79     if (offsets.start >= offsets.limit) {
    80         return;
    81     }
    83     // case type: >0 cased (UCASE_LOWER etc.)  ==0 uncased  <0 case-ignorable
    84     int32_t type;
    86     // Our mode; we are either converting letter toTitle or
    87     // toLower.
    88     UBool doTitle = TRUE;
    90     // Determine if there is a preceding context of cased case-ignorable*,
    91     // in which case we want to start in toLower mode.  If the
    92     // prior context is anything else (including empty) then start
    93     // in toTitle mode.
    94     UChar32 c;
    95     int32_t start;
    96     for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
    97         c = text.char32At(start);
    98         type=ucase_getTypeOrIgnorable(fCsp, c);
    99         if(type>0) { // cased
   100             doTitle=FALSE;
   101             break;
   102         } else if(type==0) { // uncased but not ignorable
   103             break;
   104         }
   105         // else (type<0) case-ignorable: continue
   106     }
   108     // Convert things after a cased character toLower; things
   109     // after an uncased, non-case-ignorable character toTitle.  Case-ignorable
   110     // characters are copied directly and do not change the mode.
   111     UCaseContext csc;
   112     uprv_memset(&csc, 0, sizeof(csc));
   113     csc.p = &text;
   114     csc.start = offsets.contextStart;
   115     csc.limit = offsets.contextLimit;
   117     UnicodeString tmp;
   118     const UChar *s;
   119     int32_t textPos, delta, result, locCache=0;
   121     for(textPos=offsets.start; textPos<offsets.limit;) {
   122         csc.cpStart=textPos;
   123         c=text.char32At(textPos);
   124         csc.cpLimit=textPos+=U16_LENGTH(c);
   126         type=ucase_getTypeOrIgnorable(fCsp, c);
   127         if(type>=0) { // not case-ignorable
   128             if(doTitle) {
   129                 result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
   130             } else {
   131                 result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
   132             }
   133             doTitle = (UBool)(type==0); // doTitle=isUncased
   135             if(csc.b1 && isIncremental) {
   136                 // fMap() tried to look beyond the context limit
   137                 // wait for more input
   138                 offsets.start=csc.cpStart;
   139                 return;
   140             }
   142             if(result>=0) {
   143                 // replace the current code point with its full case mapping result
   144                 // see UCASE_MAX_STRING_LENGTH
   145                 if(result<=UCASE_MAX_STRING_LENGTH) {
   146                     // string s[result]
   147                     tmp.setTo(FALSE, s, result);
   148                     delta=result-U16_LENGTH(c);
   149                 } else {
   150                     // single code point
   151                     tmp.setTo(result);
   152                     delta=tmp.length()-U16_LENGTH(c);
   153                 }
   154                 text.handleReplaceBetween(csc.cpStart, textPos, tmp);
   155                 if(delta!=0) {
   156                     textPos+=delta;
   157                     csc.limit=offsets.contextLimit+=delta;
   158                     offsets.limit+=delta;
   159                 }
   160             }
   161         }
   162     }
   163     offsets.start=textPos;
   164 }
   166 U_NAMESPACE_END
   168 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

mercurial