intl/lwbrk/idl/nsISemanticUnitScanner.idl

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/lwbrk/idl/nsISemanticUnitScanner.idl	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,48 @@
     1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "nsISupports.idl"
    1.10 +
    1.11 +%{C++
    1.12 +// {ADF42751-1CEF-4ad2-AA8E-BCB849D8D31F}
    1.13 +#define NS_SEMANTICUNITSCANNER_CID { 0xadf42751, 0x1cef, 0x4ad2, { 0xaa, 0x8e, 0xbc, 0xb8, 0x49, 0xd8, 0xd3, 0x1f}}
    1.14 +#define NS_SEMANTICUNITSCANNER_CONTRACTID "@mozilla.org/intl/semanticunitscanner;1"
    1.15 +%}
    1.16 +
    1.17 +/**
    1.18 + * Provides a language independent way to break UNICODE
    1.19 + * text into meaningful semantic units (e.g. words).
    1.20 + */
    1.21 +[scriptable, uuid(9f620be4-e535-11d6-b254-00039310a47a)]
    1.22 +interface nsISemanticUnitScanner : nsISupports {
    1.23 +    /**
    1.24 +     * start()
    1.25 +     *
    1.26 +     * Starts up the semantic unit scanner with an optional
    1.27 +     * character set, which acts as a hint to optimize the heuristics
    1.28 +     * used to determine the language(s) of the processed text.
    1.29 +     *
    1.30 +     * @param characterSet the character set the text was originally
    1.31 +     *                     encoded in (can be NULL)
    1.32 +     */
    1.33 +    void start(in string characterSet);
    1.34 +
    1.35 +    /**
    1.36 +     * next()
    1.37 +     * Get the begin / end offset of the next unit in the current text
    1.38 +     *
    1.39 +     * @param text the text to be scanned
    1.40 +     * @param length the number of characters in the text to be processed
    1.41 +     * @param pos the current position
    1.42 +     * @param isLastBuffer, the buffer is the last one
    1.43 +     * @param begin the begin offset of the next unit 
    1.44 +     * @param begin the end offset of the next unit 
    1.45 +     * @return has more unit in the current text
    1.46 +     */
    1.47 +    boolean next(in wstring text, in long length, in long pos, 
    1.48 +              in boolean isLastBuffer,
    1.49 +              out long begin, out long end );
    1.50 +
    1.51 +};

mercurial