The Tor Browser: xpcom/ds/nsCharSeparatedTokenizer.h@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

     2 /* This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     6 #ifndef __nsCharSeparatedTokenizer_h

     7 #define __nsCharSeparatedTokenizer_h

     9 #include "mozilla/RangedPtr.h"

    11 #include "nsDependentSubstring.h"

    12 #include "nsCRT.h"

    14 /**

    15  * This parses a SeparatorChar-separated string into tokens.

    16  * Whitespace surrounding tokens is not treated as part of tokens, however

    17  * whitespace inside a token is. If the final token is the empty string, it is

    18  * not returned.

    19  *

    20  * Some examples, with SeparatorChar = ',':

    21  *

    22  * "foo, bar, baz" ->      "foo" "bar" "baz"

    23  * "foo,bar,baz" ->        "foo" "bar" "baz"

    24  * "foo , bar hi , baz" -> "foo" "bar hi" "baz"

    25  * "foo, ,bar,baz" ->      "foo" "" "bar" "baz"

    26  * "foo,,bar,baz" ->       "foo" "" "bar" "baz"

    27  * "foo,bar,baz," ->       "foo" "bar" "baz"

    28  *

    29  * The function used for whitespace detection is a template argument.

    30  * By default, it is NS_IsAsciiWhitespace.

    31  */

    32 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>

    33 class nsCharSeparatedTokenizerTemplate

    34 {

    35 public:

    36     // Flags -- only one for now. If we need more, they should be defined to

    37     // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)

    38     enum {

    39         SEPARATOR_OPTIONAL = 1

    40     };

    42     nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,

    43                                      char16_t aSeparatorChar,

    44                                      uint32_t  aFlags = 0)

    45         : mIter(aSource.Data(), aSource.Length()),

    46           mEnd(aSource.Data() + aSource.Length(), aSource.Data(),

    47                aSource.Length()),

    48           mSeparatorChar(aSeparatorChar),

    49           mWhitespaceBeforeFirstToken(false),

    50           mWhitespaceAfterCurrentToken(false),

    51           mSeparatorAfterCurrentToken(false),

    52           mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)

    53     {

    54         // Skip initial whitespace

    55         while (mIter < mEnd && IsWhitespace(*mIter)) {

    56             mWhitespaceBeforeFirstToken = true;

    57             ++mIter;

    58         }

    59     }

    61     /**

    62      * Checks if any more tokens are available.

    63      */

    64     bool hasMoreTokens() const

    65     {

    66         MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),

    67                    "Should be at beginning of token if there is one");

    69         return mIter < mEnd;

    70     }

    72     /*

    73      * Returns true if there is whitespace prior to the first token.

    74      */

    75     bool whitespaceBeforeFirstToken() const

    76     {

    77         return mWhitespaceBeforeFirstToken;

    78     }

    80     /*

    81      * Returns true if there is a separator after the current token.

    82      * Useful if you want to check whether the last token has a separator

    83      * after it which may not be valid.

    84      */

    85     bool separatorAfterCurrentToken() const

    86     {

    87         return mSeparatorAfterCurrentToken;

    88     }

    90     /*

    91      * Returns true if there is any whitespace after the current token.

    92      */

    93     bool whitespaceAfterCurrentToken() const

    94     {

    95         return mWhitespaceAfterCurrentToken;

    96     }

    98     /**

    99      * Returns the next token.

   100      */

   101     const nsDependentSubstring nextToken()

   102     {

   103         mozilla::RangedPtr<const char16_t> tokenStart = mIter, tokenEnd = mIter;

   105         MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),

   106                    "Should be at beginning of token if there is one");

   108         // Search until we hit separator or end (or whitespace, if a separator

   109         // isn't required -- see clause with 'break' below).

   110         while (mIter < mEnd && *mIter != mSeparatorChar) {

   111           // Skip to end of the current word.

   112           while (mIter < mEnd &&

   113                  !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {

   114               ++mIter;

   115           }

   116           tokenEnd = mIter;

   118           // Skip whitespace after the current word.

   119           mWhitespaceAfterCurrentToken = false;

   120           while (mIter < mEnd && IsWhitespace(*mIter)) {

   121               mWhitespaceAfterCurrentToken = true;

   122               ++mIter;

   123           }

   124           if (mSeparatorOptional) {

   125             // We've hit (and skipped) whitespace, and that's sufficient to end

   126             // our token, regardless of whether we've reached a SeparatorChar.

   127             break;

   128           } // (else, we'll keep looping until we hit mEnd or SeparatorChar)

   129         }

   131         mSeparatorAfterCurrentToken = (mIter != mEnd &&

   132                                        *mIter == mSeparatorChar);

   133         MOZ_ASSERT(mSeparatorOptional ||

   134                    (mSeparatorAfterCurrentToken == (mIter < mEnd)),

   135                    "If we require a separator and haven't hit the end of "

   136                    "our string, then we shouldn't have left the loop "

   137                    "unless we hit a separator");

   139         // Skip separator (and any whitespace after it), if we're at one.

   140         if (mSeparatorAfterCurrentToken) {

   141             ++mIter;

   143             while (mIter < mEnd && IsWhitespace(*mIter)) {

   144                 mWhitespaceAfterCurrentToken = true;

   145                 ++mIter;

   146             }

   147         }

   149         return Substring(tokenStart.get(), tokenEnd.get());

   150     }

   152 private:

   153     mozilla::RangedPtr<const char16_t> mIter;

   154     const mozilla::RangedPtr<const char16_t> mEnd;

   155     char16_t mSeparatorChar;

   156     bool mWhitespaceBeforeFirstToken;

   157     bool mWhitespaceAfterCurrentToken;

   158     bool mSeparatorAfterCurrentToken;

   159     bool mSeparatorOptional;

   160 };

   162 class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<>

   163 {

   164 public:

   165     nsCharSeparatedTokenizer(const nsSubstring& aSource,

   166                              char16_t aSeparatorChar,

   167                              uint32_t  aFlags = 0)

   168       : nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)

   169     {

   170     }

   171 };

   173 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>

   174 class nsCCharSeparatedTokenizerTemplate

   175 {

   176 public:

   177     // Flags -- only one for now. If we need more, they should be defined to

   178     // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)

   179     enum {

   180         SEPARATOR_OPTIONAL = 1

   181     };

   183     nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource,

   184                                       char aSeparatorChar,

   185                                       uint32_t  aFlags = 0)

   186         : mIter(aSource.Data(), aSource.Length()),

   187           mEnd(aSource.Data() + aSource.Length(), aSource.Data(),

   188                aSource.Length()),

   189           mSeparatorChar(aSeparatorChar),

   190           mWhitespaceBeforeFirstToken(false),

   191           mWhitespaceAfterCurrentToken(false),

   192           mSeparatorAfterCurrentToken(false),

   193           mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)

   194     {

   195         // Skip initial whitespace

   196         while (mIter < mEnd && IsWhitespace(*mIter)) {

   197             mWhitespaceBeforeFirstToken = true;

   198             ++mIter;

   199         }

   200     }

   202     /**

   203      * Checks if any more tokens are available.

   204      */

   205     bool hasMoreTokens() const

   206     {

   207         MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),

   208                    "Should be at beginning of token if there is one");

   210         return mIter < mEnd;

   211     }

   213     /*

   214      * Returns true if there is whitespace prior to the first token.

   215      */

   216     bool whitespaceBeforeFirstToken() const

   217     {

   218         return mWhitespaceBeforeFirstToken;

   219     }

   221     /*

   222      * Returns true if there is a separator after the current token.

   223      * Useful if you want to check whether the last token has a separator

   224      * after it which may not be valid.

   225      */

   226     bool separatorAfterCurrentToken() const

   227     {

   228         return mSeparatorAfterCurrentToken;

   229     }

   231     /*

   232      * Returns true if there is any whitespace after the current token.

   233      */

   234     bool whitespaceAfterCurrentToken() const

   235     {

   236         return mWhitespaceAfterCurrentToken;

   237     }

   239     /**

   240      * Returns the next token.

   241      */

   242     const nsDependentCSubstring nextToken()

   243     {

   244         mozilla::RangedPtr<const char> tokenStart = mIter, tokenEnd = mIter;

   246         MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),

   247                    "Should be at beginning of token if there is one");

   249         // Search until we hit separator or end (or whitespace, if a separator

   250         // isn't required -- see clause with 'break' below).

   251         while (mIter < mEnd && *mIter != mSeparatorChar) {

   252           // Skip to end of the current word.

   253           while (mIter < mEnd &&

   254                  !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {

   255               ++mIter;

   256           }

   257           tokenEnd = mIter;

   259           // Skip whitespace after the current word.

   260           mWhitespaceAfterCurrentToken = false;

   261           while (mIter < mEnd && IsWhitespace(*mIter)) {

   262               mWhitespaceAfterCurrentToken = true;

   263               ++mIter;

   264           }

   265           if (mSeparatorOptional) {

   266             // We've hit (and skipped) whitespace, and that's sufficient to end

   267             // our token, regardless of whether we've reached a SeparatorChar.

   268             break;

   269           } // (else, we'll keep looping until we hit mEnd or SeparatorChar)

   270         }

   272         mSeparatorAfterCurrentToken = (mIter != mEnd &&

   273                                        *mIter == mSeparatorChar);

   274         MOZ_ASSERT(mSeparatorOptional ||

   275                    (mSeparatorAfterCurrentToken == (mIter < mEnd)),

   276                    "If we require a separator and haven't hit the end of "

   277                    "our string, then we shouldn't have left the loop "

   278                    "unless we hit a separator");

   280         // Skip separator (and any whitespace after it), if we're at one.

   281         if (mSeparatorAfterCurrentToken) {

   282             ++mIter;

   284             while (mIter < mEnd && IsWhitespace(*mIter)) {

   285                 mWhitespaceAfterCurrentToken = true;

   286                 ++mIter;

   287             }

   288         }

   290         return Substring(tokenStart.get(), tokenEnd.get());

   291     }

   293 private:

   294     mozilla::RangedPtr<const char> mIter;

   295     const mozilla::RangedPtr<const char> mEnd;

   296     char mSeparatorChar;

   297     bool mWhitespaceBeforeFirstToken;

   298     bool mWhitespaceAfterCurrentToken;

   299     bool mSeparatorAfterCurrentToken;

   300     bool mSeparatorOptional;

   301 };

   303 class nsCCharSeparatedTokenizer: public nsCCharSeparatedTokenizerTemplate<>

   304 {

   305 public:

   306     nsCCharSeparatedTokenizer(const nsCSubstring& aSource,

   307                               char aSeparatorChar,

   308                               uint32_t aFlags = 0)

   309       : nsCCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)

   310     {

   311     }

   312 };

   314 #endif /* __nsCharSeparatedTokenizer_h */

The Tor Browser / file revision

xpcom/ds/nsCharSeparatedTokenizer.h@6474c204b198

xpcom/ds/nsCharSeparatedTokenizer.h