xpcom/ds/nsCharSeparatedTokenizer.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #ifndef __nsCharSeparatedTokenizer_h
michael@0 7 #define __nsCharSeparatedTokenizer_h
michael@0 8
michael@0 9 #include "mozilla/RangedPtr.h"
michael@0 10
michael@0 11 #include "nsDependentSubstring.h"
michael@0 12 #include "nsCRT.h"
michael@0 13
michael@0 14 /**
michael@0 15 * This parses a SeparatorChar-separated string into tokens.
michael@0 16 * Whitespace surrounding tokens is not treated as part of tokens, however
michael@0 17 * whitespace inside a token is. If the final token is the empty string, it is
michael@0 18 * not returned.
michael@0 19 *
michael@0 20 * Some examples, with SeparatorChar = ',':
michael@0 21 *
michael@0 22 * "foo, bar, baz" -> "foo" "bar" "baz"
michael@0 23 * "foo,bar,baz" -> "foo" "bar" "baz"
michael@0 24 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
michael@0 25 * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
michael@0 26 * "foo,,bar,baz" -> "foo" "" "bar" "baz"
michael@0 27 * "foo,bar,baz," -> "foo" "bar" "baz"
michael@0 28 *
michael@0 29 * The function used for whitespace detection is a template argument.
michael@0 30 * By default, it is NS_IsAsciiWhitespace.
michael@0 31 */
michael@0 32 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
michael@0 33 class nsCharSeparatedTokenizerTemplate
michael@0 34 {
michael@0 35 public:
michael@0 36 // Flags -- only one for now. If we need more, they should be defined to
michael@0 37 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
michael@0 38 enum {
michael@0 39 SEPARATOR_OPTIONAL = 1
michael@0 40 };
michael@0 41
michael@0 42 nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
michael@0 43 char16_t aSeparatorChar,
michael@0 44 uint32_t aFlags = 0)
michael@0 45 : mIter(aSource.Data(), aSource.Length()),
michael@0 46 mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
michael@0 47 aSource.Length()),
michael@0 48 mSeparatorChar(aSeparatorChar),
michael@0 49 mWhitespaceBeforeFirstToken(false),
michael@0 50 mWhitespaceAfterCurrentToken(false),
michael@0 51 mSeparatorAfterCurrentToken(false),
michael@0 52 mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
michael@0 53 {
michael@0 54 // Skip initial whitespace
michael@0 55 while (mIter < mEnd && IsWhitespace(*mIter)) {
michael@0 56 mWhitespaceBeforeFirstToken = true;
michael@0 57 ++mIter;
michael@0 58 }
michael@0 59 }
michael@0 60
michael@0 61 /**
michael@0 62 * Checks if any more tokens are available.
michael@0 63 */
michael@0 64 bool hasMoreTokens() const
michael@0 65 {
michael@0 66 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
michael@0 67 "Should be at beginning of token if there is one");
michael@0 68
michael@0 69 return mIter < mEnd;
michael@0 70 }
michael@0 71
michael@0 72 /*
michael@0 73 * Returns true if there is whitespace prior to the first token.
michael@0 74 */
michael@0 75 bool whitespaceBeforeFirstToken() const
michael@0 76 {
michael@0 77 return mWhitespaceBeforeFirstToken;
michael@0 78 }
michael@0 79
michael@0 80 /*
michael@0 81 * Returns true if there is a separator after the current token.
michael@0 82 * Useful if you want to check whether the last token has a separator
michael@0 83 * after it which may not be valid.
michael@0 84 */
michael@0 85 bool separatorAfterCurrentToken() const
michael@0 86 {
michael@0 87 return mSeparatorAfterCurrentToken;
michael@0 88 }
michael@0 89
michael@0 90 /*
michael@0 91 * Returns true if there is any whitespace after the current token.
michael@0 92 */
michael@0 93 bool whitespaceAfterCurrentToken() const
michael@0 94 {
michael@0 95 return mWhitespaceAfterCurrentToken;
michael@0 96 }
michael@0 97
michael@0 98 /**
michael@0 99 * Returns the next token.
michael@0 100 */
michael@0 101 const nsDependentSubstring nextToken()
michael@0 102 {
michael@0 103 mozilla::RangedPtr<const char16_t> tokenStart = mIter, tokenEnd = mIter;
michael@0 104
michael@0 105 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
michael@0 106 "Should be at beginning of token if there is one");
michael@0 107
michael@0 108 // Search until we hit separator or end (or whitespace, if a separator
michael@0 109 // isn't required -- see clause with 'break' below).
michael@0 110 while (mIter < mEnd && *mIter != mSeparatorChar) {
michael@0 111 // Skip to end of the current word.
michael@0 112 while (mIter < mEnd &&
michael@0 113 !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
michael@0 114 ++mIter;
michael@0 115 }
michael@0 116 tokenEnd = mIter;
michael@0 117
michael@0 118 // Skip whitespace after the current word.
michael@0 119 mWhitespaceAfterCurrentToken = false;
michael@0 120 while (mIter < mEnd && IsWhitespace(*mIter)) {
michael@0 121 mWhitespaceAfterCurrentToken = true;
michael@0 122 ++mIter;
michael@0 123 }
michael@0 124 if (mSeparatorOptional) {
michael@0 125 // We've hit (and skipped) whitespace, and that's sufficient to end
michael@0 126 // our token, regardless of whether we've reached a SeparatorChar.
michael@0 127 break;
michael@0 128 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
michael@0 129 }
michael@0 130
michael@0 131 mSeparatorAfterCurrentToken = (mIter != mEnd &&
michael@0 132 *mIter == mSeparatorChar);
michael@0 133 MOZ_ASSERT(mSeparatorOptional ||
michael@0 134 (mSeparatorAfterCurrentToken == (mIter < mEnd)),
michael@0 135 "If we require a separator and haven't hit the end of "
michael@0 136 "our string, then we shouldn't have left the loop "
michael@0 137 "unless we hit a separator");
michael@0 138
michael@0 139 // Skip separator (and any whitespace after it), if we're at one.
michael@0 140 if (mSeparatorAfterCurrentToken) {
michael@0 141 ++mIter;
michael@0 142
michael@0 143 while (mIter < mEnd && IsWhitespace(*mIter)) {
michael@0 144 mWhitespaceAfterCurrentToken = true;
michael@0 145 ++mIter;
michael@0 146 }
michael@0 147 }
michael@0 148
michael@0 149 return Substring(tokenStart.get(), tokenEnd.get());
michael@0 150 }
michael@0 151
michael@0 152 private:
michael@0 153 mozilla::RangedPtr<const char16_t> mIter;
michael@0 154 const mozilla::RangedPtr<const char16_t> mEnd;
michael@0 155 char16_t mSeparatorChar;
michael@0 156 bool mWhitespaceBeforeFirstToken;
michael@0 157 bool mWhitespaceAfterCurrentToken;
michael@0 158 bool mSeparatorAfterCurrentToken;
michael@0 159 bool mSeparatorOptional;
michael@0 160 };
michael@0 161
michael@0 162 class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<>
michael@0 163 {
michael@0 164 public:
michael@0 165 nsCharSeparatedTokenizer(const nsSubstring& aSource,
michael@0 166 char16_t aSeparatorChar,
michael@0 167 uint32_t aFlags = 0)
michael@0 168 : nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)
michael@0 169 {
michael@0 170 }
michael@0 171 };
michael@0 172
michael@0 173 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
michael@0 174 class nsCCharSeparatedTokenizerTemplate
michael@0 175 {
michael@0 176 public:
michael@0 177 // Flags -- only one for now. If we need more, they should be defined to
michael@0 178 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
michael@0 179 enum {
michael@0 180 SEPARATOR_OPTIONAL = 1
michael@0 181 };
michael@0 182
michael@0 183 nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource,
michael@0 184 char aSeparatorChar,
michael@0 185 uint32_t aFlags = 0)
michael@0 186 : mIter(aSource.Data(), aSource.Length()),
michael@0 187 mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
michael@0 188 aSource.Length()),
michael@0 189 mSeparatorChar(aSeparatorChar),
michael@0 190 mWhitespaceBeforeFirstToken(false),
michael@0 191 mWhitespaceAfterCurrentToken(false),
michael@0 192 mSeparatorAfterCurrentToken(false),
michael@0 193 mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
michael@0 194 {
michael@0 195 // Skip initial whitespace
michael@0 196 while (mIter < mEnd && IsWhitespace(*mIter)) {
michael@0 197 mWhitespaceBeforeFirstToken = true;
michael@0 198 ++mIter;
michael@0 199 }
michael@0 200 }
michael@0 201
michael@0 202 /**
michael@0 203 * Checks if any more tokens are available.
michael@0 204 */
michael@0 205 bool hasMoreTokens() const
michael@0 206 {
michael@0 207 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
michael@0 208 "Should be at beginning of token if there is one");
michael@0 209
michael@0 210 return mIter < mEnd;
michael@0 211 }
michael@0 212
michael@0 213 /*
michael@0 214 * Returns true if there is whitespace prior to the first token.
michael@0 215 */
michael@0 216 bool whitespaceBeforeFirstToken() const
michael@0 217 {
michael@0 218 return mWhitespaceBeforeFirstToken;
michael@0 219 }
michael@0 220
michael@0 221 /*
michael@0 222 * Returns true if there is a separator after the current token.
michael@0 223 * Useful if you want to check whether the last token has a separator
michael@0 224 * after it which may not be valid.
michael@0 225 */
michael@0 226 bool separatorAfterCurrentToken() const
michael@0 227 {
michael@0 228 return mSeparatorAfterCurrentToken;
michael@0 229 }
michael@0 230
michael@0 231 /*
michael@0 232 * Returns true if there is any whitespace after the current token.
michael@0 233 */
michael@0 234 bool whitespaceAfterCurrentToken() const
michael@0 235 {
michael@0 236 return mWhitespaceAfterCurrentToken;
michael@0 237 }
michael@0 238
michael@0 239 /**
michael@0 240 * Returns the next token.
michael@0 241 */
michael@0 242 const nsDependentCSubstring nextToken()
michael@0 243 {
michael@0 244 mozilla::RangedPtr<const char> tokenStart = mIter, tokenEnd = mIter;
michael@0 245
michael@0 246 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
michael@0 247 "Should be at beginning of token if there is one");
michael@0 248
michael@0 249 // Search until we hit separator or end (or whitespace, if a separator
michael@0 250 // isn't required -- see clause with 'break' below).
michael@0 251 while (mIter < mEnd && *mIter != mSeparatorChar) {
michael@0 252 // Skip to end of the current word.
michael@0 253 while (mIter < mEnd &&
michael@0 254 !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
michael@0 255 ++mIter;
michael@0 256 }
michael@0 257 tokenEnd = mIter;
michael@0 258
michael@0 259 // Skip whitespace after the current word.
michael@0 260 mWhitespaceAfterCurrentToken = false;
michael@0 261 while (mIter < mEnd && IsWhitespace(*mIter)) {
michael@0 262 mWhitespaceAfterCurrentToken = true;
michael@0 263 ++mIter;
michael@0 264 }
michael@0 265 if (mSeparatorOptional) {
michael@0 266 // We've hit (and skipped) whitespace, and that's sufficient to end
michael@0 267 // our token, regardless of whether we've reached a SeparatorChar.
michael@0 268 break;
michael@0 269 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
michael@0 270 }
michael@0 271
michael@0 272 mSeparatorAfterCurrentToken = (mIter != mEnd &&
michael@0 273 *mIter == mSeparatorChar);
michael@0 274 MOZ_ASSERT(mSeparatorOptional ||
michael@0 275 (mSeparatorAfterCurrentToken == (mIter < mEnd)),
michael@0 276 "If we require a separator and haven't hit the end of "
michael@0 277 "our string, then we shouldn't have left the loop "
michael@0 278 "unless we hit a separator");
michael@0 279
michael@0 280 // Skip separator (and any whitespace after it), if we're at one.
michael@0 281 if (mSeparatorAfterCurrentToken) {
michael@0 282 ++mIter;
michael@0 283
michael@0 284 while (mIter < mEnd && IsWhitespace(*mIter)) {
michael@0 285 mWhitespaceAfterCurrentToken = true;
michael@0 286 ++mIter;
michael@0 287 }
michael@0 288 }
michael@0 289
michael@0 290 return Substring(tokenStart.get(), tokenEnd.get());
michael@0 291 }
michael@0 292
michael@0 293 private:
michael@0 294 mozilla::RangedPtr<const char> mIter;
michael@0 295 const mozilla::RangedPtr<const char> mEnd;
michael@0 296 char mSeparatorChar;
michael@0 297 bool mWhitespaceBeforeFirstToken;
michael@0 298 bool mWhitespaceAfterCurrentToken;
michael@0 299 bool mSeparatorAfterCurrentToken;
michael@0 300 bool mSeparatorOptional;
michael@0 301 };
michael@0 302
michael@0 303 class nsCCharSeparatedTokenizer: public nsCCharSeparatedTokenizerTemplate<>
michael@0 304 {
michael@0 305 public:
michael@0 306 nsCCharSeparatedTokenizer(const nsCSubstring& aSource,
michael@0 307 char aSeparatorChar,
michael@0 308 uint32_t aFlags = 0)
michael@0 309 : nsCCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)
michael@0 310 {
michael@0 311 }
michael@0 312 };
michael@0 313
michael@0 314 #endif /* __nsCharSeparatedTokenizer_h */

mercurial