intl/icu/source/i18n/titletrn.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2001-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * Date Name Description
michael@0 7 * 05/24/01 aliu Creation.
michael@0 8 **********************************************************************
michael@0 9 */
michael@0 10
michael@0 11 #include "unicode/utypes.h"
michael@0 12
michael@0 13 #if !UCONFIG_NO_TRANSLITERATION
michael@0 14
michael@0 15 #include "unicode/uchar.h"
michael@0 16 #include "unicode/uniset.h"
michael@0 17 #include "unicode/ustring.h"
michael@0 18 #include "unicode/utf16.h"
michael@0 19 #include "titletrn.h"
michael@0 20 #include "umutex.h"
michael@0 21 #include "ucase.h"
michael@0 22 #include "cpputils.h"
michael@0 23
michael@0 24 U_NAMESPACE_BEGIN
michael@0 25
michael@0 26 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
michael@0 27
michael@0 28 TitlecaseTransliterator::TitlecaseTransliterator() :
michael@0 29 CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL)
michael@0 30 {
michael@0 31 // Need to look back 2 characters in the case of "can't"
michael@0 32 setMaximumContextLength(2);
michael@0 33 }
michael@0 34
michael@0 35 /**
michael@0 36 * Destructor.
michael@0 37 */
michael@0 38 TitlecaseTransliterator::~TitlecaseTransliterator() {
michael@0 39 }
michael@0 40
michael@0 41 /**
michael@0 42 * Copy constructor.
michael@0 43 */
michael@0 44 TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
michael@0 45 CaseMapTransliterator(o)
michael@0 46 {
michael@0 47 }
michael@0 48
michael@0 49 /**
michael@0 50 * Assignment operator.
michael@0 51 */
michael@0 52 /*TitlecaseTransliterator& TitlecaseTransliterator::operator=(
michael@0 53 const TitlecaseTransliterator& o) {
michael@0 54 CaseMapTransliterator::operator=(o);
michael@0 55 return *this;
michael@0 56 }*/
michael@0 57
michael@0 58 /**
michael@0 59 * Transliterator API.
michael@0 60 */
michael@0 61 Transliterator* TitlecaseTransliterator::clone(void) const {
michael@0 62 return new TitlecaseTransliterator(*this);
michael@0 63 }
michael@0 64
michael@0 65 /**
michael@0 66 * Implements {@link Transliterator#handleTransliterate}.
michael@0 67 */
michael@0 68 void TitlecaseTransliterator::handleTransliterate(
michael@0 69 Replaceable& text, UTransPosition& offsets,
michael@0 70 UBool isIncremental) const
michael@0 71 {
michael@0 72 // TODO reimplement, see ustrcase.c
michael@0 73 // using a real word break iterator
michael@0 74 // instead of just looking for a transition between cased and uncased characters
michael@0 75 // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
michael@0 76 // needs to take isIncremental into account because case mappings are context-sensitive
michael@0 77 // also detect when lowercasing function did not finish because of context
michael@0 78
michael@0 79 if (offsets.start >= offsets.limit) {
michael@0 80 return;
michael@0 81 }
michael@0 82
michael@0 83 // case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable
michael@0 84 int32_t type;
michael@0 85
michael@0 86 // Our mode; we are either converting letter toTitle or
michael@0 87 // toLower.
michael@0 88 UBool doTitle = TRUE;
michael@0 89
michael@0 90 // Determine if there is a preceding context of cased case-ignorable*,
michael@0 91 // in which case we want to start in toLower mode. If the
michael@0 92 // prior context is anything else (including empty) then start
michael@0 93 // in toTitle mode.
michael@0 94 UChar32 c;
michael@0 95 int32_t start;
michael@0 96 for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
michael@0 97 c = text.char32At(start);
michael@0 98 type=ucase_getTypeOrIgnorable(fCsp, c);
michael@0 99 if(type>0) { // cased
michael@0 100 doTitle=FALSE;
michael@0 101 break;
michael@0 102 } else if(type==0) { // uncased but not ignorable
michael@0 103 break;
michael@0 104 }
michael@0 105 // else (type<0) case-ignorable: continue
michael@0 106 }
michael@0 107
michael@0 108 // Convert things after a cased character toLower; things
michael@0 109 // after an uncased, non-case-ignorable character toTitle. Case-ignorable
michael@0 110 // characters are copied directly and do not change the mode.
michael@0 111 UCaseContext csc;
michael@0 112 uprv_memset(&csc, 0, sizeof(csc));
michael@0 113 csc.p = &text;
michael@0 114 csc.start = offsets.contextStart;
michael@0 115 csc.limit = offsets.contextLimit;
michael@0 116
michael@0 117 UnicodeString tmp;
michael@0 118 const UChar *s;
michael@0 119 int32_t textPos, delta, result, locCache=0;
michael@0 120
michael@0 121 for(textPos=offsets.start; textPos<offsets.limit;) {
michael@0 122 csc.cpStart=textPos;
michael@0 123 c=text.char32At(textPos);
michael@0 124 csc.cpLimit=textPos+=U16_LENGTH(c);
michael@0 125
michael@0 126 type=ucase_getTypeOrIgnorable(fCsp, c);
michael@0 127 if(type>=0) { // not case-ignorable
michael@0 128 if(doTitle) {
michael@0 129 result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
michael@0 130 } else {
michael@0 131 result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
michael@0 132 }
michael@0 133 doTitle = (UBool)(type==0); // doTitle=isUncased
michael@0 134
michael@0 135 if(csc.b1 && isIncremental) {
michael@0 136 // fMap() tried to look beyond the context limit
michael@0 137 // wait for more input
michael@0 138 offsets.start=csc.cpStart;
michael@0 139 return;
michael@0 140 }
michael@0 141
michael@0 142 if(result>=0) {
michael@0 143 // replace the current code point with its full case mapping result
michael@0 144 // see UCASE_MAX_STRING_LENGTH
michael@0 145 if(result<=UCASE_MAX_STRING_LENGTH) {
michael@0 146 // string s[result]
michael@0 147 tmp.setTo(FALSE, s, result);
michael@0 148 delta=result-U16_LENGTH(c);
michael@0 149 } else {
michael@0 150 // single code point
michael@0 151 tmp.setTo(result);
michael@0 152 delta=tmp.length()-U16_LENGTH(c);
michael@0 153 }
michael@0 154 text.handleReplaceBetween(csc.cpStart, textPos, tmp);
michael@0 155 if(delta!=0) {
michael@0 156 textPos+=delta;
michael@0 157 csc.limit=offsets.contextLimit+=delta;
michael@0 158 offsets.limit+=delta;
michael@0 159 }
michael@0 160 }
michael@0 161 }
michael@0 162 }
michael@0 163 offsets.start=textPos;
michael@0 164 }
michael@0 165
michael@0 166 U_NAMESPACE_END
michael@0 167
michael@0 168 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

mercurial