|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsSemanticUnitScanner.h" |
|
7 |
|
8 NS_IMPL_ISUPPORTS(nsSemanticUnitScanner, nsISemanticUnitScanner) |
|
9 |
|
10 nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker() |
|
11 { |
|
12 /* member initializers and constructor code */ |
|
13 } |
|
14 |
|
15 nsSemanticUnitScanner::~nsSemanticUnitScanner() |
|
16 { |
|
17 /* destructor code */ |
|
18 } |
|
19 |
|
20 |
|
21 /* void start (in string characterSet); */ |
|
22 NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet) |
|
23 { |
|
24 // do nothing for now. |
|
25 return NS_OK; |
|
26 } |
|
27 |
|
28 /* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */ |
|
29 NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval) |
|
30 { |
|
31 // xxx need to bullet proff and check input pointer |
|
32 // make sure begin, end and _retval is not nullptr here |
|
33 |
|
34 // if we reach the end, just return |
|
35 if (pos >= length) { |
|
36 *begin = pos; |
|
37 *end = pos; |
|
38 *_retval = false; |
|
39 return NS_OK; |
|
40 } |
|
41 |
|
42 uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]); |
|
43 |
|
44 // if we are in chinese mode, return one han letter at a time |
|
45 // we should not do this if we are in Japanese or Korean mode |
|
46 if (kWbClassHanLetter == char_class) { |
|
47 *begin = pos; |
|
48 *end = pos+1; |
|
49 *_retval = true; |
|
50 return NS_OK; |
|
51 } |
|
52 |
|
53 int32_t next; |
|
54 // find the next "word" |
|
55 next = NextWord(text, (uint32_t) length, (uint32_t) pos); |
|
56 |
|
57 // if we don't have enough text to make decision, return |
|
58 if (next == NS_WORDBREAKER_NEED_MORE_TEXT) { |
|
59 *begin = pos; |
|
60 *end = isLastBuffer ? length : pos; |
|
61 *_retval = isLastBuffer; |
|
62 return NS_OK; |
|
63 } |
|
64 |
|
65 // if what we got is space or punct, look at the next break |
|
66 if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) { |
|
67 // if the next "word" is not letters, |
|
68 // call itself recursively with the new pos |
|
69 return Next(text, length, next, isLastBuffer, begin, end, _retval); |
|
70 } |
|
71 |
|
72 // for the rest, return |
|
73 *begin = pos; |
|
74 *end = next; |
|
75 *_retval = true; |
|
76 return NS_OK; |
|
77 } |
|
78 |