michael@0: // michael@0: // Copyright (C) 2012 International Business Machines Corporation michael@0: // and others. All rights reserved. michael@0: // michael@0: // file: regeximp.cpp michael@0: // michael@0: // ICU Regular Expressions, michael@0: // miscellaneous implementation functions. michael@0: // michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_REGULAR_EXPRESSIONS michael@0: #include "regeximp.h" michael@0: #include "unicode/utf16.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) : michael@0: fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { michael@0: fcsp = ucase_getSingleton(); michael@0: } michael@0: michael@0: CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {} michael@0: michael@0: UChar32 CaseFoldingUTextIterator::next() { michael@0: UChar32 foldedC; michael@0: UChar32 originalC; michael@0: if (fFoldChars == NULL) { michael@0: // We are not in a string folding of an earlier character. michael@0: // Start handling the next char from the input UText. michael@0: originalC = UTEXT_NEXT32(&fUText); michael@0: if (originalC == U_SENTINEL) { michael@0: return originalC; michael@0: } michael@0: fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); michael@0: if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { michael@0: // input code point folds to a single code point, possibly itself. michael@0: // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. michael@0: if (fFoldLength < 0) { michael@0: fFoldLength = ~fFoldLength; michael@0: } michael@0: foldedC = (UChar32)fFoldLength; michael@0: fFoldChars = NULL; michael@0: return foldedC; michael@0: } michael@0: // String foldings fall through here. michael@0: fFoldIndex = 0; michael@0: } michael@0: michael@0: U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); michael@0: if (fFoldIndex >= fFoldLength) { michael@0: fFoldChars = NULL; michael@0: } michael@0: return foldedC; michael@0: } michael@0: michael@0: michael@0: UBool CaseFoldingUTextIterator::inExpansion() { michael@0: return fFoldChars != NULL; michael@0: } michael@0: michael@0: michael@0: michael@0: CaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) : michael@0: fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { michael@0: fcsp = ucase_getSingleton(); michael@0: } michael@0: michael@0: michael@0: CaseFoldingUCharIterator::~CaseFoldingUCharIterator() {} michael@0: michael@0: michael@0: UChar32 CaseFoldingUCharIterator::next() { michael@0: UChar32 foldedC; michael@0: UChar32 originalC; michael@0: if (fFoldChars == NULL) { michael@0: // We are not in a string folding of an earlier character. michael@0: // Start handling the next char from the input UText. michael@0: if (fIndex >= fLimit) { michael@0: return U_SENTINEL; michael@0: } michael@0: U16_NEXT(fChars, fIndex, fLimit, originalC); michael@0: michael@0: fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); michael@0: if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { michael@0: // input code point folds to a single code point, possibly itself. michael@0: // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. michael@0: if (fFoldLength < 0) { michael@0: fFoldLength = ~fFoldLength; michael@0: } michael@0: foldedC = (UChar32)fFoldLength; michael@0: fFoldChars = NULL; michael@0: return foldedC; michael@0: } michael@0: // String foldings fall through here. michael@0: fFoldIndex = 0; michael@0: } michael@0: michael@0: U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); michael@0: if (fFoldIndex >= fFoldLength) { michael@0: fFoldChars = NULL; michael@0: } michael@0: return foldedC; michael@0: } michael@0: michael@0: michael@0: UBool CaseFoldingUCharIterator::inExpansion() { michael@0: return fFoldChars != NULL; michael@0: } michael@0: michael@0: int64_t CaseFoldingUCharIterator::getIndex() { michael@0: return fIndex; michael@0: } michael@0: michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif michael@0: