1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/ipc/chromium/src/base/string_tokenizer.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,202 @@ 1.4 +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1.5 +// Use of this source code is governed by a BSD-style license that can be 1.6 +// found in the LICENSE file. 1.7 + 1.8 +#ifndef BASE_STRING_TOKENIZER_H_ 1.9 +#define BASE_STRING_TOKENIZER_H_ 1.10 + 1.11 +#include <string> 1.12 + 1.13 +// StringTokenizerT is a simple string tokenizer class. It works like an 1.14 +// iterator that with each step (see the Advance method) updates members that 1.15 +// refer to the next token in the input string. The user may optionally 1.16 +// configure the tokenizer to return delimiters. 1.17 +// 1.18 +// 1.19 +// EXAMPLE 1: 1.20 +// 1.21 +// StringTokenizer t("this is a test", " "); 1.22 +// while (t.GetNext()) { 1.23 +// printf("%s\n", t.token().c_str()); 1.24 +// } 1.25 +// 1.26 +// Output: 1.27 +// 1.28 +// this 1.29 +// is 1.30 +// a 1.31 +// test 1.32 +// 1.33 +// 1.34 +// EXAMPLE 2: 1.35 +// 1.36 +// StringTokenizer t("no-cache=\"foo, bar\", private", ", "); 1.37 +// t.set_quote_chars("\""); 1.38 +// while (t.GetNext()) { 1.39 +// printf("%s\n", t.token().c_str()); 1.40 +// } 1.41 +// 1.42 +// Output: 1.43 +// 1.44 +// no-cache="foo, bar" 1.45 +// private 1.46 +// 1.47 +// 1.48 +// EXAMPLE 3: 1.49 +// 1.50 +// bool next_is_option = false, next_is_value = false; 1.51 +// std::string input = "text/html; charset=UTF-8; foo=bar"; 1.52 +// StringTokenizer t(input, "; ="); 1.53 +// t.set_options(StringTokenizer::RETURN_DELIMS); 1.54 +// while (t.GetNext()) { 1.55 +// if (t.token_is_delim()) { 1.56 +// switch (*t.token_begin()) { 1.57 +// case ';': 1.58 +// next_is_option = true; 1.59 +// break; 1.60 +// case '=': 1.61 +// next_is_value = true; 1.62 +// break; 1.63 +// } 1.64 +// } else { 1.65 +// const char* label; 1.66 +// if (next_is_option) { 1.67 +// label = "option-name"; 1.68 +// next_is_option = false; 1.69 +// } else if (next_is_value) { 1.70 +// label = "option-value"; 1.71 +// next_is_value = false; 1.72 +// } else { 1.73 +// label = "mime-type"; 1.74 +// } 1.75 +// printf("%s: %s\n", label, t.token().c_str()); 1.76 +// } 1.77 +// } 1.78 +// 1.79 +// 1.80 +template <class str, class const_iterator> 1.81 +class StringTokenizerT { 1.82 + public: 1.83 + typedef typename str::value_type char_type; 1.84 + 1.85 + // Options that may be pass to set_options() 1.86 + enum { 1.87 + // Specifies the delimiters should be returned as tokens 1.88 + RETURN_DELIMS = 1 << 0, 1.89 + }; 1.90 + 1.91 + StringTokenizerT(const str& string, 1.92 + const str& delims) { 1.93 + Init(string.begin(), string.end(), delims); 1.94 + } 1.95 + 1.96 + StringTokenizerT(const_iterator string_begin, 1.97 + const_iterator string_end, 1.98 + const str& delims) { 1.99 + Init(string_begin, string_end, delims); 1.100 + } 1.101 + 1.102 + // Set the options for this tokenizer. By default, this is 0. 1.103 + void set_options(int options) { options_ = options; } 1.104 + 1.105 + // Set the characters to regard as quotes. By default, this is empty. When 1.106 + // a quote char is encountered, the tokenizer will switch into a mode where 1.107 + // it ignores delimiters that it finds. It switches out of this mode once it 1.108 + // finds another instance of the quote char. If a backslash is encountered 1.109 + // within a quoted string, then the next character is skipped. 1.110 + void set_quote_chars(const str& quotes) { quotes_ = quotes; } 1.111 + 1.112 + // Call this method to advance the tokenizer to the next delimiter. This 1.113 + // returns false if the tokenizer is complete. This method must be called 1.114 + // before calling any of the token* methods. 1.115 + bool GetNext() { 1.116 + AdvanceState state; 1.117 + token_is_delim_ = false; 1.118 + for (;;) { 1.119 + token_begin_ = token_end_; 1.120 + if (token_end_ == end_) 1.121 + return false; 1.122 + ++token_end_; 1.123 + if (AdvanceOne(&state, *token_begin_)) 1.124 + break; 1.125 + if (options_ & RETURN_DELIMS) { 1.126 + token_is_delim_ = true; 1.127 + return true; 1.128 + } 1.129 + // else skip over delim 1.130 + } 1.131 + while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) 1.132 + ++token_end_; 1.133 + return true; 1.134 + } 1.135 + 1.136 + // Returns true if token is a delimiter. When the tokenizer is constructed 1.137 + // with the RETURN_DELIMS option, this method can be used to check if the 1.138 + // returned token is actually a delimiter. 1.139 + bool token_is_delim() const { return token_is_delim_; } 1.140 + 1.141 + // If GetNext() returned true, then these methods may be used to read the 1.142 + // value of the token. 1.143 + const_iterator token_begin() const { return token_begin_; } 1.144 + const_iterator token_end() const { return token_end_; } 1.145 + str token() const { return str(token_begin_, token_end_); } 1.146 + 1.147 + private: 1.148 + void Init(const_iterator string_begin, 1.149 + const_iterator string_end, 1.150 + const str& delims) { 1.151 + token_end_ = string_begin; 1.152 + end_ = string_end; 1.153 + delims_ = delims; 1.154 + options_ = 0; 1.155 + } 1.156 + 1.157 + bool IsDelim(char_type c) const { 1.158 + return delims_.find(c) != str::npos; 1.159 + } 1.160 + 1.161 + bool IsQuote(char_type c) const { 1.162 + return quotes_.find(c) != str::npos; 1.163 + } 1.164 + 1.165 + struct AdvanceState { 1.166 + bool in_quote; 1.167 + bool in_escape; 1.168 + char_type quote_char; 1.169 + AdvanceState() : in_quote(false), in_escape(false) {} 1.170 + }; 1.171 + 1.172 + // Returns true if a delimiter was not hit. 1.173 + bool AdvanceOne(AdvanceState* state, char_type c) { 1.174 + if (state->in_quote) { 1.175 + if (state->in_escape) { 1.176 + state->in_escape = false; 1.177 + } else if (c == '\\') { 1.178 + state->in_escape = true; 1.179 + } else if (c == state->quote_char) { 1.180 + state->in_quote = false; 1.181 + } 1.182 + } else { 1.183 + if (IsDelim(c)) 1.184 + return false; 1.185 + state->in_quote = IsQuote(state->quote_char = c); 1.186 + } 1.187 + return true; 1.188 + } 1.189 + 1.190 + const_iterator token_begin_; 1.191 + const_iterator token_end_; 1.192 + const_iterator end_; 1.193 + str delims_; 1.194 + str quotes_; 1.195 + int options_; 1.196 + bool token_is_delim_; 1.197 +}; 1.198 + 1.199 +typedef StringTokenizerT<std::string, std::string::const_iterator> 1.200 + StringTokenizer; 1.201 +typedef StringTokenizerT<std::wstring, std::wstring::const_iterator> 1.202 + WStringTokenizer; 1.203 +typedef StringTokenizerT<std::string, const char*> CStringTokenizer; 1.204 + 1.205 +#endif // BASE_STRING_TOKENIZER_H_