Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef __nsCharSeparatedTokenizer_h
7 #define __nsCharSeparatedTokenizer_h
9 #include "mozilla/RangedPtr.h"
11 #include "nsDependentSubstring.h"
12 #include "nsCRT.h"
14 /**
15 * This parses a SeparatorChar-separated string into tokens.
16 * Whitespace surrounding tokens is not treated as part of tokens, however
17 * whitespace inside a token is. If the final token is the empty string, it is
18 * not returned.
19 *
20 * Some examples, with SeparatorChar = ',':
21 *
22 * "foo, bar, baz" -> "foo" "bar" "baz"
23 * "foo,bar,baz" -> "foo" "bar" "baz"
24 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
25 * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
26 * "foo,,bar,baz" -> "foo" "" "bar" "baz"
27 * "foo,bar,baz," -> "foo" "bar" "baz"
28 *
29 * The function used for whitespace detection is a template argument.
30 * By default, it is NS_IsAsciiWhitespace.
31 */
32 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
33 class nsCharSeparatedTokenizerTemplate
34 {
35 public:
36 // Flags -- only one for now. If we need more, they should be defined to
37 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
38 enum {
39 SEPARATOR_OPTIONAL = 1
40 };
42 nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
43 char16_t aSeparatorChar,
44 uint32_t aFlags = 0)
45 : mIter(aSource.Data(), aSource.Length()),
46 mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
47 aSource.Length()),
48 mSeparatorChar(aSeparatorChar),
49 mWhitespaceBeforeFirstToken(false),
50 mWhitespaceAfterCurrentToken(false),
51 mSeparatorAfterCurrentToken(false),
52 mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
53 {
54 // Skip initial whitespace
55 while (mIter < mEnd && IsWhitespace(*mIter)) {
56 mWhitespaceBeforeFirstToken = true;
57 ++mIter;
58 }
59 }
61 /**
62 * Checks if any more tokens are available.
63 */
64 bool hasMoreTokens() const
65 {
66 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
67 "Should be at beginning of token if there is one");
69 return mIter < mEnd;
70 }
72 /*
73 * Returns true if there is whitespace prior to the first token.
74 */
75 bool whitespaceBeforeFirstToken() const
76 {
77 return mWhitespaceBeforeFirstToken;
78 }
80 /*
81 * Returns true if there is a separator after the current token.
82 * Useful if you want to check whether the last token has a separator
83 * after it which may not be valid.
84 */
85 bool separatorAfterCurrentToken() const
86 {
87 return mSeparatorAfterCurrentToken;
88 }
90 /*
91 * Returns true if there is any whitespace after the current token.
92 */
93 bool whitespaceAfterCurrentToken() const
94 {
95 return mWhitespaceAfterCurrentToken;
96 }
98 /**
99 * Returns the next token.
100 */
101 const nsDependentSubstring nextToken()
102 {
103 mozilla::RangedPtr<const char16_t> tokenStart = mIter, tokenEnd = mIter;
105 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
106 "Should be at beginning of token if there is one");
108 // Search until we hit separator or end (or whitespace, if a separator
109 // isn't required -- see clause with 'break' below).
110 while (mIter < mEnd && *mIter != mSeparatorChar) {
111 // Skip to end of the current word.
112 while (mIter < mEnd &&
113 !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
114 ++mIter;
115 }
116 tokenEnd = mIter;
118 // Skip whitespace after the current word.
119 mWhitespaceAfterCurrentToken = false;
120 while (mIter < mEnd && IsWhitespace(*mIter)) {
121 mWhitespaceAfterCurrentToken = true;
122 ++mIter;
123 }
124 if (mSeparatorOptional) {
125 // We've hit (and skipped) whitespace, and that's sufficient to end
126 // our token, regardless of whether we've reached a SeparatorChar.
127 break;
128 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
129 }
131 mSeparatorAfterCurrentToken = (mIter != mEnd &&
132 *mIter == mSeparatorChar);
133 MOZ_ASSERT(mSeparatorOptional ||
134 (mSeparatorAfterCurrentToken == (mIter < mEnd)),
135 "If we require a separator and haven't hit the end of "
136 "our string, then we shouldn't have left the loop "
137 "unless we hit a separator");
139 // Skip separator (and any whitespace after it), if we're at one.
140 if (mSeparatorAfterCurrentToken) {
141 ++mIter;
143 while (mIter < mEnd && IsWhitespace(*mIter)) {
144 mWhitespaceAfterCurrentToken = true;
145 ++mIter;
146 }
147 }
149 return Substring(tokenStart.get(), tokenEnd.get());
150 }
152 private:
153 mozilla::RangedPtr<const char16_t> mIter;
154 const mozilla::RangedPtr<const char16_t> mEnd;
155 char16_t mSeparatorChar;
156 bool mWhitespaceBeforeFirstToken;
157 bool mWhitespaceAfterCurrentToken;
158 bool mSeparatorAfterCurrentToken;
159 bool mSeparatorOptional;
160 };
162 class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<>
163 {
164 public:
165 nsCharSeparatedTokenizer(const nsSubstring& aSource,
166 char16_t aSeparatorChar,
167 uint32_t aFlags = 0)
168 : nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)
169 {
170 }
171 };
173 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
174 class nsCCharSeparatedTokenizerTemplate
175 {
176 public:
177 // Flags -- only one for now. If we need more, they should be defined to
178 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
179 enum {
180 SEPARATOR_OPTIONAL = 1
181 };
183 nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource,
184 char aSeparatorChar,
185 uint32_t aFlags = 0)
186 : mIter(aSource.Data(), aSource.Length()),
187 mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
188 aSource.Length()),
189 mSeparatorChar(aSeparatorChar),
190 mWhitespaceBeforeFirstToken(false),
191 mWhitespaceAfterCurrentToken(false),
192 mSeparatorAfterCurrentToken(false),
193 mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
194 {
195 // Skip initial whitespace
196 while (mIter < mEnd && IsWhitespace(*mIter)) {
197 mWhitespaceBeforeFirstToken = true;
198 ++mIter;
199 }
200 }
202 /**
203 * Checks if any more tokens are available.
204 */
205 bool hasMoreTokens() const
206 {
207 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
208 "Should be at beginning of token if there is one");
210 return mIter < mEnd;
211 }
213 /*
214 * Returns true if there is whitespace prior to the first token.
215 */
216 bool whitespaceBeforeFirstToken() const
217 {
218 return mWhitespaceBeforeFirstToken;
219 }
221 /*
222 * Returns true if there is a separator after the current token.
223 * Useful if you want to check whether the last token has a separator
224 * after it which may not be valid.
225 */
226 bool separatorAfterCurrentToken() const
227 {
228 return mSeparatorAfterCurrentToken;
229 }
231 /*
232 * Returns true if there is any whitespace after the current token.
233 */
234 bool whitespaceAfterCurrentToken() const
235 {
236 return mWhitespaceAfterCurrentToken;
237 }
239 /**
240 * Returns the next token.
241 */
242 const nsDependentCSubstring nextToken()
243 {
244 mozilla::RangedPtr<const char> tokenStart = mIter, tokenEnd = mIter;
246 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
247 "Should be at beginning of token if there is one");
249 // Search until we hit separator or end (or whitespace, if a separator
250 // isn't required -- see clause with 'break' below).
251 while (mIter < mEnd && *mIter != mSeparatorChar) {
252 // Skip to end of the current word.
253 while (mIter < mEnd &&
254 !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
255 ++mIter;
256 }
257 tokenEnd = mIter;
259 // Skip whitespace after the current word.
260 mWhitespaceAfterCurrentToken = false;
261 while (mIter < mEnd && IsWhitespace(*mIter)) {
262 mWhitespaceAfterCurrentToken = true;
263 ++mIter;
264 }
265 if (mSeparatorOptional) {
266 // We've hit (and skipped) whitespace, and that's sufficient to end
267 // our token, regardless of whether we've reached a SeparatorChar.
268 break;
269 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
270 }
272 mSeparatorAfterCurrentToken = (mIter != mEnd &&
273 *mIter == mSeparatorChar);
274 MOZ_ASSERT(mSeparatorOptional ||
275 (mSeparatorAfterCurrentToken == (mIter < mEnd)),
276 "If we require a separator and haven't hit the end of "
277 "our string, then we shouldn't have left the loop "
278 "unless we hit a separator");
280 // Skip separator (and any whitespace after it), if we're at one.
281 if (mSeparatorAfterCurrentToken) {
282 ++mIter;
284 while (mIter < mEnd && IsWhitespace(*mIter)) {
285 mWhitespaceAfterCurrentToken = true;
286 ++mIter;
287 }
288 }
290 return Substring(tokenStart.get(), tokenEnd.get());
291 }
293 private:
294 mozilla::RangedPtr<const char> mIter;
295 const mozilla::RangedPtr<const char> mEnd;
296 char mSeparatorChar;
297 bool mWhitespaceBeforeFirstToken;
298 bool mWhitespaceAfterCurrentToken;
299 bool mSeparatorAfterCurrentToken;
300 bool mSeparatorOptional;
301 };
303 class nsCCharSeparatedTokenizer: public nsCCharSeparatedTokenizerTemplate<>
304 {
305 public:
306 nsCCharSeparatedTokenizer(const nsCSubstring& aSource,
307 char aSeparatorChar,
308 uint32_t aFlags = 0)
309 : nsCCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)
310 {
311 }
312 };
314 #endif /* __nsCharSeparatedTokenizer_h */