1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/lwbrk/src/nsSemanticUnitScanner.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,78 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsSemanticUnitScanner.h" 1.10 + 1.11 +NS_IMPL_ISUPPORTS(nsSemanticUnitScanner, nsISemanticUnitScanner) 1.12 + 1.13 +nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker() 1.14 +{ 1.15 + /* member initializers and constructor code */ 1.16 +} 1.17 + 1.18 +nsSemanticUnitScanner::~nsSemanticUnitScanner() 1.19 +{ 1.20 + /* destructor code */ 1.21 +} 1.22 + 1.23 + 1.24 +/* void start (in string characterSet); */ 1.25 +NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet) 1.26 +{ 1.27 + // do nothing for now. 1.28 + return NS_OK; 1.29 +} 1.30 + 1.31 +/* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */ 1.32 +NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval) 1.33 +{ 1.34 + // xxx need to bullet proff and check input pointer 1.35 + // make sure begin, end and _retval is not nullptr here 1.36 + 1.37 + // if we reach the end, just return 1.38 + if (pos >= length) { 1.39 + *begin = pos; 1.40 + *end = pos; 1.41 + *_retval = false; 1.42 + return NS_OK; 1.43 + } 1.44 + 1.45 + uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]); 1.46 + 1.47 + // if we are in chinese mode, return one han letter at a time 1.48 + // we should not do this if we are in Japanese or Korean mode 1.49 + if (kWbClassHanLetter == char_class) { 1.50 + *begin = pos; 1.51 + *end = pos+1; 1.52 + *_retval = true; 1.53 + return NS_OK; 1.54 + } 1.55 + 1.56 + int32_t next; 1.57 + // find the next "word" 1.58 + next = NextWord(text, (uint32_t) length, (uint32_t) pos); 1.59 + 1.60 + // if we don't have enough text to make decision, return 1.61 + if (next == NS_WORDBREAKER_NEED_MORE_TEXT) { 1.62 + *begin = pos; 1.63 + *end = isLastBuffer ? length : pos; 1.64 + *_retval = isLastBuffer; 1.65 + return NS_OK; 1.66 + } 1.67 + 1.68 + // if what we got is space or punct, look at the next break 1.69 + if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) { 1.70 + // if the next "word" is not letters, 1.71 + // call itself recursively with the new pos 1.72 + return Next(text, length, next, isLastBuffer, begin, end, _retval); 1.73 + } 1.74 + 1.75 + // for the rest, return 1.76 + *begin = pos; 1.77 + *end = next; 1.78 + *_retval = true; 1.79 + return NS_OK; 1.80 +} 1.81 +