1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/ds/nsCharSeparatedTokenizer.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,314 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#ifndef __nsCharSeparatedTokenizer_h 1.10 +#define __nsCharSeparatedTokenizer_h 1.11 + 1.12 +#include "mozilla/RangedPtr.h" 1.13 + 1.14 +#include "nsDependentSubstring.h" 1.15 +#include "nsCRT.h" 1.16 + 1.17 +/** 1.18 + * This parses a SeparatorChar-separated string into tokens. 1.19 + * Whitespace surrounding tokens is not treated as part of tokens, however 1.20 + * whitespace inside a token is. If the final token is the empty string, it is 1.21 + * not returned. 1.22 + * 1.23 + * Some examples, with SeparatorChar = ',': 1.24 + * 1.25 + * "foo, bar, baz" -> "foo" "bar" "baz" 1.26 + * "foo,bar,baz" -> "foo" "bar" "baz" 1.27 + * "foo , bar hi , baz" -> "foo" "bar hi" "baz" 1.28 + * "foo, ,bar,baz" -> "foo" "" "bar" "baz" 1.29 + * "foo,,bar,baz" -> "foo" "" "bar" "baz" 1.30 + * "foo,bar,baz," -> "foo" "bar" "baz" 1.31 + * 1.32 + * The function used for whitespace detection is a template argument. 1.33 + * By default, it is NS_IsAsciiWhitespace. 1.34 + */ 1.35 +template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> 1.36 +class nsCharSeparatedTokenizerTemplate 1.37 +{ 1.38 +public: 1.39 + // Flags -- only one for now. If we need more, they should be defined to 1.40 + // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) 1.41 + enum { 1.42 + SEPARATOR_OPTIONAL = 1 1.43 + }; 1.44 + 1.45 + nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource, 1.46 + char16_t aSeparatorChar, 1.47 + uint32_t aFlags = 0) 1.48 + : mIter(aSource.Data(), aSource.Length()), 1.49 + mEnd(aSource.Data() + aSource.Length(), aSource.Data(), 1.50 + aSource.Length()), 1.51 + mSeparatorChar(aSeparatorChar), 1.52 + mWhitespaceBeforeFirstToken(false), 1.53 + mWhitespaceAfterCurrentToken(false), 1.54 + mSeparatorAfterCurrentToken(false), 1.55 + mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) 1.56 + { 1.57 + // Skip initial whitespace 1.58 + while (mIter < mEnd && IsWhitespace(*mIter)) { 1.59 + mWhitespaceBeforeFirstToken = true; 1.60 + ++mIter; 1.61 + } 1.62 + } 1.63 + 1.64 + /** 1.65 + * Checks if any more tokens are available. 1.66 + */ 1.67 + bool hasMoreTokens() const 1.68 + { 1.69 + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), 1.70 + "Should be at beginning of token if there is one"); 1.71 + 1.72 + return mIter < mEnd; 1.73 + } 1.74 + 1.75 + /* 1.76 + * Returns true if there is whitespace prior to the first token. 1.77 + */ 1.78 + bool whitespaceBeforeFirstToken() const 1.79 + { 1.80 + return mWhitespaceBeforeFirstToken; 1.81 + } 1.82 + 1.83 + /* 1.84 + * Returns true if there is a separator after the current token. 1.85 + * Useful if you want to check whether the last token has a separator 1.86 + * after it which may not be valid. 1.87 + */ 1.88 + bool separatorAfterCurrentToken() const 1.89 + { 1.90 + return mSeparatorAfterCurrentToken; 1.91 + } 1.92 + 1.93 + /* 1.94 + * Returns true if there is any whitespace after the current token. 1.95 + */ 1.96 + bool whitespaceAfterCurrentToken() const 1.97 + { 1.98 + return mWhitespaceAfterCurrentToken; 1.99 + } 1.100 + 1.101 + /** 1.102 + * Returns the next token. 1.103 + */ 1.104 + const nsDependentSubstring nextToken() 1.105 + { 1.106 + mozilla::RangedPtr<const char16_t> tokenStart = mIter, tokenEnd = mIter; 1.107 + 1.108 + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), 1.109 + "Should be at beginning of token if there is one"); 1.110 + 1.111 + // Search until we hit separator or end (or whitespace, if a separator 1.112 + // isn't required -- see clause with 'break' below). 1.113 + while (mIter < mEnd && *mIter != mSeparatorChar) { 1.114 + // Skip to end of the current word. 1.115 + while (mIter < mEnd && 1.116 + !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { 1.117 + ++mIter; 1.118 + } 1.119 + tokenEnd = mIter; 1.120 + 1.121 + // Skip whitespace after the current word. 1.122 + mWhitespaceAfterCurrentToken = false; 1.123 + while (mIter < mEnd && IsWhitespace(*mIter)) { 1.124 + mWhitespaceAfterCurrentToken = true; 1.125 + ++mIter; 1.126 + } 1.127 + if (mSeparatorOptional) { 1.128 + // We've hit (and skipped) whitespace, and that's sufficient to end 1.129 + // our token, regardless of whether we've reached a SeparatorChar. 1.130 + break; 1.131 + } // (else, we'll keep looping until we hit mEnd or SeparatorChar) 1.132 + } 1.133 + 1.134 + mSeparatorAfterCurrentToken = (mIter != mEnd && 1.135 + *mIter == mSeparatorChar); 1.136 + MOZ_ASSERT(mSeparatorOptional || 1.137 + (mSeparatorAfterCurrentToken == (mIter < mEnd)), 1.138 + "If we require a separator and haven't hit the end of " 1.139 + "our string, then we shouldn't have left the loop " 1.140 + "unless we hit a separator"); 1.141 + 1.142 + // Skip separator (and any whitespace after it), if we're at one. 1.143 + if (mSeparatorAfterCurrentToken) { 1.144 + ++mIter; 1.145 + 1.146 + while (mIter < mEnd && IsWhitespace(*mIter)) { 1.147 + mWhitespaceAfterCurrentToken = true; 1.148 + ++mIter; 1.149 + } 1.150 + } 1.151 + 1.152 + return Substring(tokenStart.get(), tokenEnd.get()); 1.153 + } 1.154 + 1.155 +private: 1.156 + mozilla::RangedPtr<const char16_t> mIter; 1.157 + const mozilla::RangedPtr<const char16_t> mEnd; 1.158 + char16_t mSeparatorChar; 1.159 + bool mWhitespaceBeforeFirstToken; 1.160 + bool mWhitespaceAfterCurrentToken; 1.161 + bool mSeparatorAfterCurrentToken; 1.162 + bool mSeparatorOptional; 1.163 +}; 1.164 + 1.165 +class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<> 1.166 +{ 1.167 +public: 1.168 + nsCharSeparatedTokenizer(const nsSubstring& aSource, 1.169 + char16_t aSeparatorChar, 1.170 + uint32_t aFlags = 0) 1.171 + : nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags) 1.172 + { 1.173 + } 1.174 +}; 1.175 + 1.176 +template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> 1.177 +class nsCCharSeparatedTokenizerTemplate 1.178 +{ 1.179 +public: 1.180 + // Flags -- only one for now. If we need more, they should be defined to 1.181 + // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) 1.182 + enum { 1.183 + SEPARATOR_OPTIONAL = 1 1.184 + }; 1.185 + 1.186 + nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource, 1.187 + char aSeparatorChar, 1.188 + uint32_t aFlags = 0) 1.189 + : mIter(aSource.Data(), aSource.Length()), 1.190 + mEnd(aSource.Data() + aSource.Length(), aSource.Data(), 1.191 + aSource.Length()), 1.192 + mSeparatorChar(aSeparatorChar), 1.193 + mWhitespaceBeforeFirstToken(false), 1.194 + mWhitespaceAfterCurrentToken(false), 1.195 + mSeparatorAfterCurrentToken(false), 1.196 + mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) 1.197 + { 1.198 + // Skip initial whitespace 1.199 + while (mIter < mEnd && IsWhitespace(*mIter)) { 1.200 + mWhitespaceBeforeFirstToken = true; 1.201 + ++mIter; 1.202 + } 1.203 + } 1.204 + 1.205 + /** 1.206 + * Checks if any more tokens are available. 1.207 + */ 1.208 + bool hasMoreTokens() const 1.209 + { 1.210 + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), 1.211 + "Should be at beginning of token if there is one"); 1.212 + 1.213 + return mIter < mEnd; 1.214 + } 1.215 + 1.216 + /* 1.217 + * Returns true if there is whitespace prior to the first token. 1.218 + */ 1.219 + bool whitespaceBeforeFirstToken() const 1.220 + { 1.221 + return mWhitespaceBeforeFirstToken; 1.222 + } 1.223 + 1.224 + /* 1.225 + * Returns true if there is a separator after the current token. 1.226 + * Useful if you want to check whether the last token has a separator 1.227 + * after it which may not be valid. 1.228 + */ 1.229 + bool separatorAfterCurrentToken() const 1.230 + { 1.231 + return mSeparatorAfterCurrentToken; 1.232 + } 1.233 + 1.234 + /* 1.235 + * Returns true if there is any whitespace after the current token. 1.236 + */ 1.237 + bool whitespaceAfterCurrentToken() const 1.238 + { 1.239 + return mWhitespaceAfterCurrentToken; 1.240 + } 1.241 + 1.242 + /** 1.243 + * Returns the next token. 1.244 + */ 1.245 + const nsDependentCSubstring nextToken() 1.246 + { 1.247 + mozilla::RangedPtr<const char> tokenStart = mIter, tokenEnd = mIter; 1.248 + 1.249 + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), 1.250 + "Should be at beginning of token if there is one"); 1.251 + 1.252 + // Search until we hit separator or end (or whitespace, if a separator 1.253 + // isn't required -- see clause with 'break' below). 1.254 + while (mIter < mEnd && *mIter != mSeparatorChar) { 1.255 + // Skip to end of the current word. 1.256 + while (mIter < mEnd && 1.257 + !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { 1.258 + ++mIter; 1.259 + } 1.260 + tokenEnd = mIter; 1.261 + 1.262 + // Skip whitespace after the current word. 1.263 + mWhitespaceAfterCurrentToken = false; 1.264 + while (mIter < mEnd && IsWhitespace(*mIter)) { 1.265 + mWhitespaceAfterCurrentToken = true; 1.266 + ++mIter; 1.267 + } 1.268 + if (mSeparatorOptional) { 1.269 + // We've hit (and skipped) whitespace, and that's sufficient to end 1.270 + // our token, regardless of whether we've reached a SeparatorChar. 1.271 + break; 1.272 + } // (else, we'll keep looping until we hit mEnd or SeparatorChar) 1.273 + } 1.274 + 1.275 + mSeparatorAfterCurrentToken = (mIter != mEnd && 1.276 + *mIter == mSeparatorChar); 1.277 + MOZ_ASSERT(mSeparatorOptional || 1.278 + (mSeparatorAfterCurrentToken == (mIter < mEnd)), 1.279 + "If we require a separator and haven't hit the end of " 1.280 + "our string, then we shouldn't have left the loop " 1.281 + "unless we hit a separator"); 1.282 + 1.283 + // Skip separator (and any whitespace after it), if we're at one. 1.284 + if (mSeparatorAfterCurrentToken) { 1.285 + ++mIter; 1.286 + 1.287 + while (mIter < mEnd && IsWhitespace(*mIter)) { 1.288 + mWhitespaceAfterCurrentToken = true; 1.289 + ++mIter; 1.290 + } 1.291 + } 1.292 + 1.293 + return Substring(tokenStart.get(), tokenEnd.get()); 1.294 + } 1.295 + 1.296 +private: 1.297 + mozilla::RangedPtr<const char> mIter; 1.298 + const mozilla::RangedPtr<const char> mEnd; 1.299 + char mSeparatorChar; 1.300 + bool mWhitespaceBeforeFirstToken; 1.301 + bool mWhitespaceAfterCurrentToken; 1.302 + bool mSeparatorAfterCurrentToken; 1.303 + bool mSeparatorOptional; 1.304 +}; 1.305 + 1.306 +class nsCCharSeparatedTokenizer: public nsCCharSeparatedTokenizerTemplate<> 1.307 +{ 1.308 +public: 1.309 + nsCCharSeparatedTokenizer(const nsCSubstring& aSource, 1.310 + char aSeparatorChar, 1.311 + uint32_t aFlags = 0) 1.312 + : nsCCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags) 1.313 + { 1.314 + } 1.315 +}; 1.316 + 1.317 +#endif /* __nsCharSeparatedTokenizer_h */