intl/lwbrk/src/nsSemanticUnitScanner.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsSemanticUnitScanner.h"
     8 NS_IMPL_ISUPPORTS(nsSemanticUnitScanner, nsISemanticUnitScanner)
    10 nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
    11 {
    12   /* member initializers and constructor code */
    13 }
    15 nsSemanticUnitScanner::~nsSemanticUnitScanner()
    16 {
    17   /* destructor code */
    18 }
    21 /* void start (in string characterSet); */
    22 NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
    23 {
    24     // do nothing for now.
    25     return NS_OK;
    26 }
    28 /* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */
    29 NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval)
    30 {
    31     // xxx need to bullet proff and check input pointer 
    32     //  make sure begin, end and _retval is not nullptr here
    34     // if we reach the end, just return
    35     if (pos >= length) {
    36        *begin = pos;
    37        *end = pos;
    38        *_retval = false;
    39        return NS_OK;
    40     }
    42     uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]);
    44     // if we are in chinese mode, return one han letter at a time
    45     // we should not do this if we are in Japanese or Korean mode
    46     if (kWbClassHanLetter == char_class) {
    47        *begin = pos;
    48        *end = pos+1;
    49        *_retval = true;
    50        return NS_OK;
    51     }
    53     int32_t next;
    54     // find the next "word"
    55     next = NextWord(text, (uint32_t) length, (uint32_t) pos);
    57     // if we don't have enough text to make decision, return 
    58     if (next == NS_WORDBREAKER_NEED_MORE_TEXT) {
    59        *begin = pos;
    60        *end = isLastBuffer ? length : pos;
    61        *_retval = isLastBuffer;
    62        return NS_OK;
    63     } 
    65     // if what we got is space or punct, look at the next break
    66     if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
    67         // if the next "word" is not letters, 
    68         // call itself recursively with the new pos
    69         return Next(text, length, next, isLastBuffer, begin, end, _retval);
    70     }
    72     // for the rest, return 
    73     *begin = pos;
    74     *end = next;
    75     *_retval = true;
    76     return NS_OK;
    77 }

mercurial