1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/titletrn.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,168 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2001-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Date Name Description 1.10 +* 05/24/01 aliu Creation. 1.11 +********************************************************************** 1.12 +*/ 1.13 + 1.14 +#include "unicode/utypes.h" 1.15 + 1.16 +#if !UCONFIG_NO_TRANSLITERATION 1.17 + 1.18 +#include "unicode/uchar.h" 1.19 +#include "unicode/uniset.h" 1.20 +#include "unicode/ustring.h" 1.21 +#include "unicode/utf16.h" 1.22 +#include "titletrn.h" 1.23 +#include "umutex.h" 1.24 +#include "ucase.h" 1.25 +#include "cpputils.h" 1.26 + 1.27 +U_NAMESPACE_BEGIN 1.28 + 1.29 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator) 1.30 + 1.31 +TitlecaseTransliterator::TitlecaseTransliterator() : 1.32 + CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL) 1.33 +{ 1.34 + // Need to look back 2 characters in the case of "can't" 1.35 + setMaximumContextLength(2); 1.36 +} 1.37 + 1.38 +/** 1.39 + * Destructor. 1.40 + */ 1.41 +TitlecaseTransliterator::~TitlecaseTransliterator() { 1.42 +} 1.43 + 1.44 +/** 1.45 + * Copy constructor. 1.46 + */ 1.47 +TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) : 1.48 + CaseMapTransliterator(o) 1.49 +{ 1.50 +} 1.51 + 1.52 +/** 1.53 + * Assignment operator. 1.54 + */ 1.55 +/*TitlecaseTransliterator& TitlecaseTransliterator::operator=( 1.56 + const TitlecaseTransliterator& o) { 1.57 + CaseMapTransliterator::operator=(o); 1.58 + return *this; 1.59 +}*/ 1.60 + 1.61 +/** 1.62 + * Transliterator API. 1.63 + */ 1.64 +Transliterator* TitlecaseTransliterator::clone(void) const { 1.65 + return new TitlecaseTransliterator(*this); 1.66 +} 1.67 + 1.68 +/** 1.69 + * Implements {@link Transliterator#handleTransliterate}. 1.70 + */ 1.71 +void TitlecaseTransliterator::handleTransliterate( 1.72 + Replaceable& text, UTransPosition& offsets, 1.73 + UBool isIncremental) const 1.74 +{ 1.75 + // TODO reimplement, see ustrcase.c 1.76 + // using a real word break iterator 1.77 + // instead of just looking for a transition between cased and uncased characters 1.78 + // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap) 1.79 + // needs to take isIncremental into account because case mappings are context-sensitive 1.80 + // also detect when lowercasing function did not finish because of context 1.81 + 1.82 + if (offsets.start >= offsets.limit) { 1.83 + return; 1.84 + } 1.85 + 1.86 + // case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable 1.87 + int32_t type; 1.88 + 1.89 + // Our mode; we are either converting letter toTitle or 1.90 + // toLower. 1.91 + UBool doTitle = TRUE; 1.92 + 1.93 + // Determine if there is a preceding context of cased case-ignorable*, 1.94 + // in which case we want to start in toLower mode. If the 1.95 + // prior context is anything else (including empty) then start 1.96 + // in toTitle mode. 1.97 + UChar32 c; 1.98 + int32_t start; 1.99 + for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) { 1.100 + c = text.char32At(start); 1.101 + type=ucase_getTypeOrIgnorable(fCsp, c); 1.102 + if(type>0) { // cased 1.103 + doTitle=FALSE; 1.104 + break; 1.105 + } else if(type==0) { // uncased but not ignorable 1.106 + break; 1.107 + } 1.108 + // else (type<0) case-ignorable: continue 1.109 + } 1.110 + 1.111 + // Convert things after a cased character toLower; things 1.112 + // after an uncased, non-case-ignorable character toTitle. Case-ignorable 1.113 + // characters are copied directly and do not change the mode. 1.114 + UCaseContext csc; 1.115 + uprv_memset(&csc, 0, sizeof(csc)); 1.116 + csc.p = &text; 1.117 + csc.start = offsets.contextStart; 1.118 + csc.limit = offsets.contextLimit; 1.119 + 1.120 + UnicodeString tmp; 1.121 + const UChar *s; 1.122 + int32_t textPos, delta, result, locCache=0; 1.123 + 1.124 + for(textPos=offsets.start; textPos<offsets.limit;) { 1.125 + csc.cpStart=textPos; 1.126 + c=text.char32At(textPos); 1.127 + csc.cpLimit=textPos+=U16_LENGTH(c); 1.128 + 1.129 + type=ucase_getTypeOrIgnorable(fCsp, c); 1.130 + if(type>=0) { // not case-ignorable 1.131 + if(doTitle) { 1.132 + result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); 1.133 + } else { 1.134 + result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); 1.135 + } 1.136 + doTitle = (UBool)(type==0); // doTitle=isUncased 1.137 + 1.138 + if(csc.b1 && isIncremental) { 1.139 + // fMap() tried to look beyond the context limit 1.140 + // wait for more input 1.141 + offsets.start=csc.cpStart; 1.142 + return; 1.143 + } 1.144 + 1.145 + if(result>=0) { 1.146 + // replace the current code point with its full case mapping result 1.147 + // see UCASE_MAX_STRING_LENGTH 1.148 + if(result<=UCASE_MAX_STRING_LENGTH) { 1.149 + // string s[result] 1.150 + tmp.setTo(FALSE, s, result); 1.151 + delta=result-U16_LENGTH(c); 1.152 + } else { 1.153 + // single code point 1.154 + tmp.setTo(result); 1.155 + delta=tmp.length()-U16_LENGTH(c); 1.156 + } 1.157 + text.handleReplaceBetween(csc.cpStart, textPos, tmp); 1.158 + if(delta!=0) { 1.159 + textPos+=delta; 1.160 + csc.limit=offsets.contextLimit+=delta; 1.161 + offsets.limit+=delta; 1.162 + } 1.163 + } 1.164 + } 1.165 + } 1.166 + offsets.start=textPos; 1.167 +} 1.168 + 1.169 +U_NAMESPACE_END 1.170 + 1.171 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */