intl/lwbrk/src/nsSemanticUnitScanner.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsSemanticUnitScanner.h"
michael@0 7
michael@0 8 NS_IMPL_ISUPPORTS(nsSemanticUnitScanner, nsISemanticUnitScanner)
michael@0 9
michael@0 10 nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
michael@0 11 {
michael@0 12 /* member initializers and constructor code */
michael@0 13 }
michael@0 14
michael@0 15 nsSemanticUnitScanner::~nsSemanticUnitScanner()
michael@0 16 {
michael@0 17 /* destructor code */
michael@0 18 }
michael@0 19
michael@0 20
michael@0 21 /* void start (in string characterSet); */
michael@0 22 NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
michael@0 23 {
michael@0 24 // do nothing for now.
michael@0 25 return NS_OK;
michael@0 26 }
michael@0 27
michael@0 28 /* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */
michael@0 29 NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval)
michael@0 30 {
michael@0 31 // xxx need to bullet proff and check input pointer
michael@0 32 // make sure begin, end and _retval is not nullptr here
michael@0 33
michael@0 34 // if we reach the end, just return
michael@0 35 if (pos >= length) {
michael@0 36 *begin = pos;
michael@0 37 *end = pos;
michael@0 38 *_retval = false;
michael@0 39 return NS_OK;
michael@0 40 }
michael@0 41
michael@0 42 uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]);
michael@0 43
michael@0 44 // if we are in chinese mode, return one han letter at a time
michael@0 45 // we should not do this if we are in Japanese or Korean mode
michael@0 46 if (kWbClassHanLetter == char_class) {
michael@0 47 *begin = pos;
michael@0 48 *end = pos+1;
michael@0 49 *_retval = true;
michael@0 50 return NS_OK;
michael@0 51 }
michael@0 52
michael@0 53 int32_t next;
michael@0 54 // find the next "word"
michael@0 55 next = NextWord(text, (uint32_t) length, (uint32_t) pos);
michael@0 56
michael@0 57 // if we don't have enough text to make decision, return
michael@0 58 if (next == NS_WORDBREAKER_NEED_MORE_TEXT) {
michael@0 59 *begin = pos;
michael@0 60 *end = isLastBuffer ? length : pos;
michael@0 61 *_retval = isLastBuffer;
michael@0 62 return NS_OK;
michael@0 63 }
michael@0 64
michael@0 65 // if what we got is space or punct, look at the next break
michael@0 66 if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
michael@0 67 // if the next "word" is not letters,
michael@0 68 // call itself recursively with the new pos
michael@0 69 return Next(text, length, next, isLastBuffer, begin, end, _retval);
michael@0 70 }
michael@0 71
michael@0 72 // for the rest, return
michael@0 73 *begin = pos;
michael@0 74 *end = next;
michael@0 75 *_retval = true;
michael@0 76 return NS_OK;
michael@0 77 }
michael@0 78

mercurial