diff -r 000000000000 -r 6474c204b198 xpcom/ds/nsCharSeparatedTokenizer.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xpcom/ds/nsCharSeparatedTokenizer.h Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,314 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsCharSeparatedTokenizer_h +#define __nsCharSeparatedTokenizer_h + +#include "mozilla/RangedPtr.h" + +#include "nsDependentSubstring.h" +#include "nsCRT.h" + +/** + * This parses a SeparatorChar-separated string into tokens. + * Whitespace surrounding tokens is not treated as part of tokens, however + * whitespace inside a token is. If the final token is the empty string, it is + * not returned. + * + * Some examples, with SeparatorChar = ',': + * + * "foo, bar, baz" -> "foo" "bar" "baz" + * "foo,bar,baz" -> "foo" "bar" "baz" + * "foo , bar hi , baz" -> "foo" "bar hi" "baz" + * "foo, ,bar,baz" -> "foo" "" "bar" "baz" + * "foo,,bar,baz" -> "foo" "" "bar" "baz" + * "foo,bar,baz," -> "foo" "bar" "baz" + * + * The function used for whitespace detection is a template argument. + * By default, it is NS_IsAsciiWhitespace. + */ +template +class nsCharSeparatedTokenizerTemplate +{ +public: + // Flags -- only one for now. If we need more, they should be defined to + // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) + enum { + SEPARATOR_OPTIONAL = 1 + }; + + nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource, + char16_t aSeparatorChar, + uint32_t aFlags = 0) + : mIter(aSource.Data(), aSource.Length()), + mEnd(aSource.Data() + aSource.Length(), aSource.Data(), + aSource.Length()), + mSeparatorChar(aSeparatorChar), + mWhitespaceBeforeFirstToken(false), + mWhitespaceAfterCurrentToken(false), + mSeparatorAfterCurrentToken(false), + mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) + { + // Skip initial whitespace + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceBeforeFirstToken = true; + ++mIter; + } + } + + /** + * Checks if any more tokens are available. + */ + bool hasMoreTokens() const + { + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + return mIter < mEnd; + } + + /* + * Returns true if there is whitespace prior to the first token. + */ + bool whitespaceBeforeFirstToken() const + { + return mWhitespaceBeforeFirstToken; + } + + /* + * Returns true if there is a separator after the current token. + * Useful if you want to check whether the last token has a separator + * after it which may not be valid. + */ + bool separatorAfterCurrentToken() const + { + return mSeparatorAfterCurrentToken; + } + + /* + * Returns true if there is any whitespace after the current token. + */ + bool whitespaceAfterCurrentToken() const + { + return mWhitespaceAfterCurrentToken; + } + + /** + * Returns the next token. + */ + const nsDependentSubstring nextToken() + { + mozilla::RangedPtr tokenStart = mIter, tokenEnd = mIter; + + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + // Search until we hit separator or end (or whitespace, if a separator + // isn't required -- see clause with 'break' below). + while (mIter < mEnd && *mIter != mSeparatorChar) { + // Skip to end of the current word. + while (mIter < mEnd && + !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { + ++mIter; + } + tokenEnd = mIter; + + // Skip whitespace after the current word. + mWhitespaceAfterCurrentToken = false; + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + if (mSeparatorOptional) { + // We've hit (and skipped) whitespace, and that's sufficient to end + // our token, regardless of whether we've reached a SeparatorChar. + break; + } // (else, we'll keep looping until we hit mEnd or SeparatorChar) + } + + mSeparatorAfterCurrentToken = (mIter != mEnd && + *mIter == mSeparatorChar); + MOZ_ASSERT(mSeparatorOptional || + (mSeparatorAfterCurrentToken == (mIter < mEnd)), + "If we require a separator and haven't hit the end of " + "our string, then we shouldn't have left the loop " + "unless we hit a separator"); + + // Skip separator (and any whitespace after it), if we're at one. + if (mSeparatorAfterCurrentToken) { + ++mIter; + + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + } + + return Substring(tokenStart.get(), tokenEnd.get()); + } + +private: + mozilla::RangedPtr mIter; + const mozilla::RangedPtr mEnd; + char16_t mSeparatorChar; + bool mWhitespaceBeforeFirstToken; + bool mWhitespaceAfterCurrentToken; + bool mSeparatorAfterCurrentToken; + bool mSeparatorOptional; +}; + +class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<> +{ +public: + nsCharSeparatedTokenizer(const nsSubstring& aSource, + char16_t aSeparatorChar, + uint32_t aFlags = 0) + : nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags) + { + } +}; + +template +class nsCCharSeparatedTokenizerTemplate +{ +public: + // Flags -- only one for now. If we need more, they should be defined to + // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) + enum { + SEPARATOR_OPTIONAL = 1 + }; + + nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource, + char aSeparatorChar, + uint32_t aFlags = 0) + : mIter(aSource.Data(), aSource.Length()), + mEnd(aSource.Data() + aSource.Length(), aSource.Data(), + aSource.Length()), + mSeparatorChar(aSeparatorChar), + mWhitespaceBeforeFirstToken(false), + mWhitespaceAfterCurrentToken(false), + mSeparatorAfterCurrentToken(false), + mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) + { + // Skip initial whitespace + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceBeforeFirstToken = true; + ++mIter; + } + } + + /** + * Checks if any more tokens are available. + */ + bool hasMoreTokens() const + { + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + return mIter < mEnd; + } + + /* + * Returns true if there is whitespace prior to the first token. + */ + bool whitespaceBeforeFirstToken() const + { + return mWhitespaceBeforeFirstToken; + } + + /* + * Returns true if there is a separator after the current token. + * Useful if you want to check whether the last token has a separator + * after it which may not be valid. + */ + bool separatorAfterCurrentToken() const + { + return mSeparatorAfterCurrentToken; + } + + /* + * Returns true if there is any whitespace after the current token. + */ + bool whitespaceAfterCurrentToken() const + { + return mWhitespaceAfterCurrentToken; + } + + /** + * Returns the next token. + */ + const nsDependentCSubstring nextToken() + { + mozilla::RangedPtr tokenStart = mIter, tokenEnd = mIter; + + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + // Search until we hit separator or end (or whitespace, if a separator + // isn't required -- see clause with 'break' below). + while (mIter < mEnd && *mIter != mSeparatorChar) { + // Skip to end of the current word. + while (mIter < mEnd && + !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { + ++mIter; + } + tokenEnd = mIter; + + // Skip whitespace after the current word. + mWhitespaceAfterCurrentToken = false; + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + if (mSeparatorOptional) { + // We've hit (and skipped) whitespace, and that's sufficient to end + // our token, regardless of whether we've reached a SeparatorChar. + break; + } // (else, we'll keep looping until we hit mEnd or SeparatorChar) + } + + mSeparatorAfterCurrentToken = (mIter != mEnd && + *mIter == mSeparatorChar); + MOZ_ASSERT(mSeparatorOptional || + (mSeparatorAfterCurrentToken == (mIter < mEnd)), + "If we require a separator and haven't hit the end of " + "our string, then we shouldn't have left the loop " + "unless we hit a separator"); + + // Skip separator (and any whitespace after it), if we're at one. + if (mSeparatorAfterCurrentToken) { + ++mIter; + + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + } + + return Substring(tokenStart.get(), tokenEnd.get()); + } + +private: + mozilla::RangedPtr mIter; + const mozilla::RangedPtr mEnd; + char mSeparatorChar; + bool mWhitespaceBeforeFirstToken; + bool mWhitespaceAfterCurrentToken; + bool mSeparatorAfterCurrentToken; + bool mSeparatorOptional; +}; + +class nsCCharSeparatedTokenizer: public nsCCharSeparatedTokenizerTemplate<> +{ +public: + nsCCharSeparatedTokenizer(const nsCSubstring& aSource, + char aSeparatorChar, + uint32_t aFlags = 0) + : nsCCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags) + { + } +}; + +#endif /* __nsCharSeparatedTokenizer_h */