Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #ifndef __nsCharSeparatedTokenizer_h |
michael@0 | 7 | #define __nsCharSeparatedTokenizer_h |
michael@0 | 8 | |
michael@0 | 9 | #include "mozilla/RangedPtr.h" |
michael@0 | 10 | |
michael@0 | 11 | #include "nsDependentSubstring.h" |
michael@0 | 12 | #include "nsCRT.h" |
michael@0 | 13 | |
michael@0 | 14 | /** |
michael@0 | 15 | * This parses a SeparatorChar-separated string into tokens. |
michael@0 | 16 | * Whitespace surrounding tokens is not treated as part of tokens, however |
michael@0 | 17 | * whitespace inside a token is. If the final token is the empty string, it is |
michael@0 | 18 | * not returned. |
michael@0 | 19 | * |
michael@0 | 20 | * Some examples, with SeparatorChar = ',': |
michael@0 | 21 | * |
michael@0 | 22 | * "foo, bar, baz" -> "foo" "bar" "baz" |
michael@0 | 23 | * "foo,bar,baz" -> "foo" "bar" "baz" |
michael@0 | 24 | * "foo , bar hi , baz" -> "foo" "bar hi" "baz" |
michael@0 | 25 | * "foo, ,bar,baz" -> "foo" "" "bar" "baz" |
michael@0 | 26 | * "foo,,bar,baz" -> "foo" "" "bar" "baz" |
michael@0 | 27 | * "foo,bar,baz," -> "foo" "bar" "baz" |
michael@0 | 28 | * |
michael@0 | 29 | * The function used for whitespace detection is a template argument. |
michael@0 | 30 | * By default, it is NS_IsAsciiWhitespace. |
michael@0 | 31 | */ |
michael@0 | 32 | template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> |
michael@0 | 33 | class nsCharSeparatedTokenizerTemplate |
michael@0 | 34 | { |
michael@0 | 35 | public: |
michael@0 | 36 | // Flags -- only one for now. If we need more, they should be defined to |
michael@0 | 37 | // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) |
michael@0 | 38 | enum { |
michael@0 | 39 | SEPARATOR_OPTIONAL = 1 |
michael@0 | 40 | }; |
michael@0 | 41 | |
michael@0 | 42 | nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource, |
michael@0 | 43 | char16_t aSeparatorChar, |
michael@0 | 44 | uint32_t aFlags = 0) |
michael@0 | 45 | : mIter(aSource.Data(), aSource.Length()), |
michael@0 | 46 | mEnd(aSource.Data() + aSource.Length(), aSource.Data(), |
michael@0 | 47 | aSource.Length()), |
michael@0 | 48 | mSeparatorChar(aSeparatorChar), |
michael@0 | 49 | mWhitespaceBeforeFirstToken(false), |
michael@0 | 50 | mWhitespaceAfterCurrentToken(false), |
michael@0 | 51 | mSeparatorAfterCurrentToken(false), |
michael@0 | 52 | mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) |
michael@0 | 53 | { |
michael@0 | 54 | // Skip initial whitespace |
michael@0 | 55 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
michael@0 | 56 | mWhitespaceBeforeFirstToken = true; |
michael@0 | 57 | ++mIter; |
michael@0 | 58 | } |
michael@0 | 59 | } |
michael@0 | 60 | |
michael@0 | 61 | /** |
michael@0 | 62 | * Checks if any more tokens are available. |
michael@0 | 63 | */ |
michael@0 | 64 | bool hasMoreTokens() const |
michael@0 | 65 | { |
michael@0 | 66 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), |
michael@0 | 67 | "Should be at beginning of token if there is one"); |
michael@0 | 68 | |
michael@0 | 69 | return mIter < mEnd; |
michael@0 | 70 | } |
michael@0 | 71 | |
michael@0 | 72 | /* |
michael@0 | 73 | * Returns true if there is whitespace prior to the first token. |
michael@0 | 74 | */ |
michael@0 | 75 | bool whitespaceBeforeFirstToken() const |
michael@0 | 76 | { |
michael@0 | 77 | return mWhitespaceBeforeFirstToken; |
michael@0 | 78 | } |
michael@0 | 79 | |
michael@0 | 80 | /* |
michael@0 | 81 | * Returns true if there is a separator after the current token. |
michael@0 | 82 | * Useful if you want to check whether the last token has a separator |
michael@0 | 83 | * after it which may not be valid. |
michael@0 | 84 | */ |
michael@0 | 85 | bool separatorAfterCurrentToken() const |
michael@0 | 86 | { |
michael@0 | 87 | return mSeparatorAfterCurrentToken; |
michael@0 | 88 | } |
michael@0 | 89 | |
michael@0 | 90 | /* |
michael@0 | 91 | * Returns true if there is any whitespace after the current token. |
michael@0 | 92 | */ |
michael@0 | 93 | bool whitespaceAfterCurrentToken() const |
michael@0 | 94 | { |
michael@0 | 95 | return mWhitespaceAfterCurrentToken; |
michael@0 | 96 | } |
michael@0 | 97 | |
michael@0 | 98 | /** |
michael@0 | 99 | * Returns the next token. |
michael@0 | 100 | */ |
michael@0 | 101 | const nsDependentSubstring nextToken() |
michael@0 | 102 | { |
michael@0 | 103 | mozilla::RangedPtr<const char16_t> tokenStart = mIter, tokenEnd = mIter; |
michael@0 | 104 | |
michael@0 | 105 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), |
michael@0 | 106 | "Should be at beginning of token if there is one"); |
michael@0 | 107 | |
michael@0 | 108 | // Search until we hit separator or end (or whitespace, if a separator |
michael@0 | 109 | // isn't required -- see clause with 'break' below). |
michael@0 | 110 | while (mIter < mEnd && *mIter != mSeparatorChar) { |
michael@0 | 111 | // Skip to end of the current word. |
michael@0 | 112 | while (mIter < mEnd && |
michael@0 | 113 | !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { |
michael@0 | 114 | ++mIter; |
michael@0 | 115 | } |
michael@0 | 116 | tokenEnd = mIter; |
michael@0 | 117 | |
michael@0 | 118 | // Skip whitespace after the current word. |
michael@0 | 119 | mWhitespaceAfterCurrentToken = false; |
michael@0 | 120 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
michael@0 | 121 | mWhitespaceAfterCurrentToken = true; |
michael@0 | 122 | ++mIter; |
michael@0 | 123 | } |
michael@0 | 124 | if (mSeparatorOptional) { |
michael@0 | 125 | // We've hit (and skipped) whitespace, and that's sufficient to end |
michael@0 | 126 | // our token, regardless of whether we've reached a SeparatorChar. |
michael@0 | 127 | break; |
michael@0 | 128 | } // (else, we'll keep looping until we hit mEnd or SeparatorChar) |
michael@0 | 129 | } |
michael@0 | 130 | |
michael@0 | 131 | mSeparatorAfterCurrentToken = (mIter != mEnd && |
michael@0 | 132 | *mIter == mSeparatorChar); |
michael@0 | 133 | MOZ_ASSERT(mSeparatorOptional || |
michael@0 | 134 | (mSeparatorAfterCurrentToken == (mIter < mEnd)), |
michael@0 | 135 | "If we require a separator and haven't hit the end of " |
michael@0 | 136 | "our string, then we shouldn't have left the loop " |
michael@0 | 137 | "unless we hit a separator"); |
michael@0 | 138 | |
michael@0 | 139 | // Skip separator (and any whitespace after it), if we're at one. |
michael@0 | 140 | if (mSeparatorAfterCurrentToken) { |
michael@0 | 141 | ++mIter; |
michael@0 | 142 | |
michael@0 | 143 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
michael@0 | 144 | mWhitespaceAfterCurrentToken = true; |
michael@0 | 145 | ++mIter; |
michael@0 | 146 | } |
michael@0 | 147 | } |
michael@0 | 148 | |
michael@0 | 149 | return Substring(tokenStart.get(), tokenEnd.get()); |
michael@0 | 150 | } |
michael@0 | 151 | |
michael@0 | 152 | private: |
michael@0 | 153 | mozilla::RangedPtr<const char16_t> mIter; |
michael@0 | 154 | const mozilla::RangedPtr<const char16_t> mEnd; |
michael@0 | 155 | char16_t mSeparatorChar; |
michael@0 | 156 | bool mWhitespaceBeforeFirstToken; |
michael@0 | 157 | bool mWhitespaceAfterCurrentToken; |
michael@0 | 158 | bool mSeparatorAfterCurrentToken; |
michael@0 | 159 | bool mSeparatorOptional; |
michael@0 | 160 | }; |
michael@0 | 161 | |
michael@0 | 162 | class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<> |
michael@0 | 163 | { |
michael@0 | 164 | public: |
michael@0 | 165 | nsCharSeparatedTokenizer(const nsSubstring& aSource, |
michael@0 | 166 | char16_t aSeparatorChar, |
michael@0 | 167 | uint32_t aFlags = 0) |
michael@0 | 168 | : nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags) |
michael@0 | 169 | { |
michael@0 | 170 | } |
michael@0 | 171 | }; |
michael@0 | 172 | |
michael@0 | 173 | template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> |
michael@0 | 174 | class nsCCharSeparatedTokenizerTemplate |
michael@0 | 175 | { |
michael@0 | 176 | public: |
michael@0 | 177 | // Flags -- only one for now. If we need more, they should be defined to |
michael@0 | 178 | // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) |
michael@0 | 179 | enum { |
michael@0 | 180 | SEPARATOR_OPTIONAL = 1 |
michael@0 | 181 | }; |
michael@0 | 182 | |
michael@0 | 183 | nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource, |
michael@0 | 184 | char aSeparatorChar, |
michael@0 | 185 | uint32_t aFlags = 0) |
michael@0 | 186 | : mIter(aSource.Data(), aSource.Length()), |
michael@0 | 187 | mEnd(aSource.Data() + aSource.Length(), aSource.Data(), |
michael@0 | 188 | aSource.Length()), |
michael@0 | 189 | mSeparatorChar(aSeparatorChar), |
michael@0 | 190 | mWhitespaceBeforeFirstToken(false), |
michael@0 | 191 | mWhitespaceAfterCurrentToken(false), |
michael@0 | 192 | mSeparatorAfterCurrentToken(false), |
michael@0 | 193 | mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) |
michael@0 | 194 | { |
michael@0 | 195 | // Skip initial whitespace |
michael@0 | 196 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
michael@0 | 197 | mWhitespaceBeforeFirstToken = true; |
michael@0 | 198 | ++mIter; |
michael@0 | 199 | } |
michael@0 | 200 | } |
michael@0 | 201 | |
michael@0 | 202 | /** |
michael@0 | 203 | * Checks if any more tokens are available. |
michael@0 | 204 | */ |
michael@0 | 205 | bool hasMoreTokens() const |
michael@0 | 206 | { |
michael@0 | 207 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), |
michael@0 | 208 | "Should be at beginning of token if there is one"); |
michael@0 | 209 | |
michael@0 | 210 | return mIter < mEnd; |
michael@0 | 211 | } |
michael@0 | 212 | |
michael@0 | 213 | /* |
michael@0 | 214 | * Returns true if there is whitespace prior to the first token. |
michael@0 | 215 | */ |
michael@0 | 216 | bool whitespaceBeforeFirstToken() const |
michael@0 | 217 | { |
michael@0 | 218 | return mWhitespaceBeforeFirstToken; |
michael@0 | 219 | } |
michael@0 | 220 | |
michael@0 | 221 | /* |
michael@0 | 222 | * Returns true if there is a separator after the current token. |
michael@0 | 223 | * Useful if you want to check whether the last token has a separator |
michael@0 | 224 | * after it which may not be valid. |
michael@0 | 225 | */ |
michael@0 | 226 | bool separatorAfterCurrentToken() const |
michael@0 | 227 | { |
michael@0 | 228 | return mSeparatorAfterCurrentToken; |
michael@0 | 229 | } |
michael@0 | 230 | |
michael@0 | 231 | /* |
michael@0 | 232 | * Returns true if there is any whitespace after the current token. |
michael@0 | 233 | */ |
michael@0 | 234 | bool whitespaceAfterCurrentToken() const |
michael@0 | 235 | { |
michael@0 | 236 | return mWhitespaceAfterCurrentToken; |
michael@0 | 237 | } |
michael@0 | 238 | |
michael@0 | 239 | /** |
michael@0 | 240 | * Returns the next token. |
michael@0 | 241 | */ |
michael@0 | 242 | const nsDependentCSubstring nextToken() |
michael@0 | 243 | { |
michael@0 | 244 | mozilla::RangedPtr<const char> tokenStart = mIter, tokenEnd = mIter; |
michael@0 | 245 | |
michael@0 | 246 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), |
michael@0 | 247 | "Should be at beginning of token if there is one"); |
michael@0 | 248 | |
michael@0 | 249 | // Search until we hit separator or end (or whitespace, if a separator |
michael@0 | 250 | // isn't required -- see clause with 'break' below). |
michael@0 | 251 | while (mIter < mEnd && *mIter != mSeparatorChar) { |
michael@0 | 252 | // Skip to end of the current word. |
michael@0 | 253 | while (mIter < mEnd && |
michael@0 | 254 | !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { |
michael@0 | 255 | ++mIter; |
michael@0 | 256 | } |
michael@0 | 257 | tokenEnd = mIter; |
michael@0 | 258 | |
michael@0 | 259 | // Skip whitespace after the current word. |
michael@0 | 260 | mWhitespaceAfterCurrentToken = false; |
michael@0 | 261 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
michael@0 | 262 | mWhitespaceAfterCurrentToken = true; |
michael@0 | 263 | ++mIter; |
michael@0 | 264 | } |
michael@0 | 265 | if (mSeparatorOptional) { |
michael@0 | 266 | // We've hit (and skipped) whitespace, and that's sufficient to end |
michael@0 | 267 | // our token, regardless of whether we've reached a SeparatorChar. |
michael@0 | 268 | break; |
michael@0 | 269 | } // (else, we'll keep looping until we hit mEnd or SeparatorChar) |
michael@0 | 270 | } |
michael@0 | 271 | |
michael@0 | 272 | mSeparatorAfterCurrentToken = (mIter != mEnd && |
michael@0 | 273 | *mIter == mSeparatorChar); |
michael@0 | 274 | MOZ_ASSERT(mSeparatorOptional || |
michael@0 | 275 | (mSeparatorAfterCurrentToken == (mIter < mEnd)), |
michael@0 | 276 | "If we require a separator and haven't hit the end of " |
michael@0 | 277 | "our string, then we shouldn't have left the loop " |
michael@0 | 278 | "unless we hit a separator"); |
michael@0 | 279 | |
michael@0 | 280 | // Skip separator (and any whitespace after it), if we're at one. |
michael@0 | 281 | if (mSeparatorAfterCurrentToken) { |
michael@0 | 282 | ++mIter; |
michael@0 | 283 | |
michael@0 | 284 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
michael@0 | 285 | mWhitespaceAfterCurrentToken = true; |
michael@0 | 286 | ++mIter; |
michael@0 | 287 | } |
michael@0 | 288 | } |
michael@0 | 289 | |
michael@0 | 290 | return Substring(tokenStart.get(), tokenEnd.get()); |
michael@0 | 291 | } |
michael@0 | 292 | |
michael@0 | 293 | private: |
michael@0 | 294 | mozilla::RangedPtr<const char> mIter; |
michael@0 | 295 | const mozilla::RangedPtr<const char> mEnd; |
michael@0 | 296 | char mSeparatorChar; |
michael@0 | 297 | bool mWhitespaceBeforeFirstToken; |
michael@0 | 298 | bool mWhitespaceAfterCurrentToken; |
michael@0 | 299 | bool mSeparatorAfterCurrentToken; |
michael@0 | 300 | bool mSeparatorOptional; |
michael@0 | 301 | }; |
michael@0 | 302 | |
michael@0 | 303 | class nsCCharSeparatedTokenizer: public nsCCharSeparatedTokenizerTemplate<> |
michael@0 | 304 | { |
michael@0 | 305 | public: |
michael@0 | 306 | nsCCharSeparatedTokenizer(const nsCSubstring& aSource, |
michael@0 | 307 | char aSeparatorChar, |
michael@0 | 308 | uint32_t aFlags = 0) |
michael@0 | 309 | : nsCCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags) |
michael@0 | 310 | { |
michael@0 | 311 | } |
michael@0 | 312 | }; |
michael@0 | 313 | |
michael@0 | 314 | #endif /* __nsCharSeparatedTokenizer_h */ |