intl/lwbrk/src/nsSemanticUnitScanner.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/lwbrk/src/nsSemanticUnitScanner.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,78 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "nsSemanticUnitScanner.h"
    1.10 +
    1.11 +NS_IMPL_ISUPPORTS(nsSemanticUnitScanner, nsISemanticUnitScanner)
    1.12 +
    1.13 +nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
    1.14 +{
    1.15 +  /* member initializers and constructor code */
    1.16 +}
    1.17 +
    1.18 +nsSemanticUnitScanner::~nsSemanticUnitScanner()
    1.19 +{
    1.20 +  /* destructor code */
    1.21 +}
    1.22 +
    1.23 +
    1.24 +/* void start (in string characterSet); */
    1.25 +NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
    1.26 +{
    1.27 +    // do nothing for now.
    1.28 +    return NS_OK;
    1.29 +}
    1.30 +
    1.31 +/* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */
    1.32 +NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval)
    1.33 +{
    1.34 +    // xxx need to bullet proff and check input pointer 
    1.35 +    //  make sure begin, end and _retval is not nullptr here
    1.36 +
    1.37 +    // if we reach the end, just return
    1.38 +    if (pos >= length) {
    1.39 +       *begin = pos;
    1.40 +       *end = pos;
    1.41 +       *_retval = false;
    1.42 +       return NS_OK;
    1.43 +    }
    1.44 +
    1.45 +    uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]);
    1.46 +
    1.47 +    // if we are in chinese mode, return one han letter at a time
    1.48 +    // we should not do this if we are in Japanese or Korean mode
    1.49 +    if (kWbClassHanLetter == char_class) {
    1.50 +       *begin = pos;
    1.51 +       *end = pos+1;
    1.52 +       *_retval = true;
    1.53 +       return NS_OK;
    1.54 +    }
    1.55 +
    1.56 +    int32_t next;
    1.57 +    // find the next "word"
    1.58 +    next = NextWord(text, (uint32_t) length, (uint32_t) pos);
    1.59 +
    1.60 +    // if we don't have enough text to make decision, return 
    1.61 +    if (next == NS_WORDBREAKER_NEED_MORE_TEXT) {
    1.62 +       *begin = pos;
    1.63 +       *end = isLastBuffer ? length : pos;
    1.64 +       *_retval = isLastBuffer;
    1.65 +       return NS_OK;
    1.66 +    } 
    1.67 +    
    1.68 +    // if what we got is space or punct, look at the next break
    1.69 +    if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
    1.70 +        // if the next "word" is not letters, 
    1.71 +        // call itself recursively with the new pos
    1.72 +        return Next(text, length, next, isLastBuffer, begin, end, _retval);
    1.73 +    }
    1.74 +
    1.75 +    // for the rest, return 
    1.76 +    *begin = pos;
    1.77 +    *end = next;
    1.78 +    *_retval = true;
    1.79 +    return NS_OK;
    1.80 +}
    1.81 +

mercurial