michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsSemanticUnitScanner.h" michael@0: michael@0: NS_IMPL_ISUPPORTS(nsSemanticUnitScanner, nsISemanticUnitScanner) michael@0: michael@0: nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker() michael@0: { michael@0: /* member initializers and constructor code */ michael@0: } michael@0: michael@0: nsSemanticUnitScanner::~nsSemanticUnitScanner() michael@0: { michael@0: /* destructor code */ michael@0: } michael@0: michael@0: michael@0: /* void start (in string characterSet); */ michael@0: NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet) michael@0: { michael@0: // do nothing for now. michael@0: return NS_OK; michael@0: } michael@0: michael@0: /* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */ michael@0: NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval) michael@0: { michael@0: // xxx need to bullet proff and check input pointer michael@0: // make sure begin, end and _retval is not nullptr here michael@0: michael@0: // if we reach the end, just return michael@0: if (pos >= length) { michael@0: *begin = pos; michael@0: *end = pos; michael@0: *_retval = false; michael@0: return NS_OK; michael@0: } michael@0: michael@0: uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]); michael@0: michael@0: // if we are in chinese mode, return one han letter at a time michael@0: // we should not do this if we are in Japanese or Korean mode michael@0: if (kWbClassHanLetter == char_class) { michael@0: *begin = pos; michael@0: *end = pos+1; michael@0: *_retval = true; michael@0: return NS_OK; michael@0: } michael@0: michael@0: int32_t next; michael@0: // find the next "word" michael@0: next = NextWord(text, (uint32_t) length, (uint32_t) pos); michael@0: michael@0: // if we don't have enough text to make decision, return michael@0: if (next == NS_WORDBREAKER_NEED_MORE_TEXT) { michael@0: *begin = pos; michael@0: *end = isLastBuffer ? length : pos; michael@0: *_retval = isLastBuffer; michael@0: return NS_OK; michael@0: } michael@0: michael@0: // if what we got is space or punct, look at the next break michael@0: if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) { michael@0: // if the next "word" is not letters, michael@0: // call itself recursively with the new pos michael@0: return Next(text, length, next, isLastBuffer, begin, end, _retval); michael@0: } michael@0: michael@0: // for the rest, return michael@0: *begin = pos; michael@0: *end = next; michael@0: *_retval = true; michael@0: return NS_OK; michael@0: } michael@0: