ipc/chromium/src/base/string_tokenizer.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/ipc/chromium/src/base/string_tokenizer.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,202 @@
     1.4 +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
     1.5 +// Use of this source code is governed by a BSD-style license that can be
     1.6 +// found in the LICENSE file.
     1.7 +
     1.8 +#ifndef BASE_STRING_TOKENIZER_H_
     1.9 +#define BASE_STRING_TOKENIZER_H_
    1.10 +
    1.11 +#include <string>
    1.12 +
    1.13 +// StringTokenizerT is a simple string tokenizer class.  It works like an
    1.14 +// iterator that with each step (see the Advance method) updates members that
    1.15 +// refer to the next token in the input string.  The user may optionally
    1.16 +// configure the tokenizer to return delimiters.
    1.17 +//
    1.18 +//
    1.19 +// EXAMPLE 1:
    1.20 +//
    1.21 +//   StringTokenizer t("this is a test", " ");
    1.22 +//   while (t.GetNext()) {
    1.23 +//     printf("%s\n", t.token().c_str());
    1.24 +//   }
    1.25 +//
    1.26 +// Output:
    1.27 +//
    1.28 +//   this
    1.29 +//   is
    1.30 +//   a
    1.31 +//   test
    1.32 +//
    1.33 +//
    1.34 +// EXAMPLE 2:
    1.35 +//
    1.36 +//   StringTokenizer t("no-cache=\"foo, bar\", private", ", ");
    1.37 +//   t.set_quote_chars("\"");
    1.38 +//   while (t.GetNext()) {
    1.39 +//     printf("%s\n", t.token().c_str());
    1.40 +//   }
    1.41 +//
    1.42 +// Output:
    1.43 +//
    1.44 +//   no-cache="foo, bar"
    1.45 +//   private
    1.46 +//
    1.47 +//
    1.48 +// EXAMPLE 3:
    1.49 +//
    1.50 +//   bool next_is_option = false, next_is_value = false;
    1.51 +//   std::string input = "text/html; charset=UTF-8; foo=bar";
    1.52 +//   StringTokenizer t(input, "; =");
    1.53 +//   t.set_options(StringTokenizer::RETURN_DELIMS);
    1.54 +//   while (t.GetNext()) {
    1.55 +//     if (t.token_is_delim()) {
    1.56 +//       switch (*t.token_begin()) {
    1.57 +//         case ';':
    1.58 +//           next_is_option = true;
    1.59 +//           break;
    1.60 +//         case '=':
    1.61 +//           next_is_value = true;
    1.62 +//           break;
    1.63 +//       }
    1.64 +//     } else {
    1.65 +//       const char* label;
    1.66 +//       if (next_is_option) {
    1.67 +//         label = "option-name";
    1.68 +//         next_is_option = false;
    1.69 +//       } else if (next_is_value) {
    1.70 +//         label = "option-value";
    1.71 +//         next_is_value = false;
    1.72 +//       } else {
    1.73 +//         label = "mime-type";
    1.74 +//       }
    1.75 +//       printf("%s: %s\n", label, t.token().c_str());
    1.76 +//     }
    1.77 +//   }
    1.78 +//
    1.79 +//
    1.80 +template <class str, class const_iterator>
    1.81 +class StringTokenizerT {
    1.82 + public:
    1.83 +  typedef typename str::value_type char_type;
    1.84 +
    1.85 +  // Options that may be pass to set_options()
    1.86 +  enum {
    1.87 +    // Specifies the delimiters should be returned as tokens
    1.88 +    RETURN_DELIMS = 1 << 0,
    1.89 +  };
    1.90 +
    1.91 +  StringTokenizerT(const str& string,
    1.92 +                   const str& delims) {
    1.93 +    Init(string.begin(), string.end(), delims);
    1.94 +  }
    1.95 +
    1.96 +  StringTokenizerT(const_iterator string_begin,
    1.97 +                   const_iterator string_end,
    1.98 +                   const str& delims) {
    1.99 +    Init(string_begin, string_end, delims);
   1.100 +  }
   1.101 +
   1.102 +  // Set the options for this tokenizer.  By default, this is 0.
   1.103 +  void set_options(int options) { options_ = options; }
   1.104 +
   1.105 +  // Set the characters to regard as quotes.  By default, this is empty.  When
   1.106 +  // a quote char is encountered, the tokenizer will switch into a mode where
   1.107 +  // it ignores delimiters that it finds.  It switches out of this mode once it
   1.108 +  // finds another instance of the quote char.  If a backslash is encountered
   1.109 +  // within a quoted string, then the next character is skipped.
   1.110 +  void set_quote_chars(const str& quotes) { quotes_ = quotes; }
   1.111 +
   1.112 +  // Call this method to advance the tokenizer to the next delimiter.  This
   1.113 +  // returns false if the tokenizer is complete.  This method must be called
   1.114 +  // before calling any of the token* methods.
   1.115 +  bool GetNext() {
   1.116 +    AdvanceState state;
   1.117 +    token_is_delim_ = false;
   1.118 +    for (;;) {
   1.119 +      token_begin_ = token_end_;
   1.120 +      if (token_end_ == end_)
   1.121 +        return false;
   1.122 +      ++token_end_;
   1.123 +      if (AdvanceOne(&state, *token_begin_))
   1.124 +        break;
   1.125 +      if (options_ & RETURN_DELIMS) {
   1.126 +        token_is_delim_ = true;
   1.127 +        return true;
   1.128 +      }
   1.129 +      // else skip over delim
   1.130 +    }
   1.131 +    while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
   1.132 +      ++token_end_;
   1.133 +    return true;
   1.134 +  }
   1.135 +
   1.136 +  // Returns true if token is a delimiter.  When the tokenizer is constructed
   1.137 +  // with the RETURN_DELIMS option, this method can be used to check if the
   1.138 +  // returned token is actually a delimiter.
   1.139 +  bool token_is_delim() const { return token_is_delim_; }
   1.140 +
   1.141 +  // If GetNext() returned true, then these methods may be used to read the
   1.142 +  // value of the token.
   1.143 +  const_iterator token_begin() const { return token_begin_; }
   1.144 +  const_iterator token_end() const { return token_end_; }
   1.145 +  str token() const { return str(token_begin_, token_end_); }
   1.146 +
   1.147 + private:
   1.148 +  void Init(const_iterator string_begin,
   1.149 +            const_iterator string_end,
   1.150 +            const str& delims) {
   1.151 +    token_end_ = string_begin;
   1.152 +    end_ = string_end;
   1.153 +    delims_ = delims;
   1.154 +    options_ = 0;
   1.155 +  }
   1.156 +
   1.157 +  bool IsDelim(char_type c) const {
   1.158 +    return delims_.find(c) != str::npos;
   1.159 +  }
   1.160 +
   1.161 +  bool IsQuote(char_type c) const {
   1.162 +    return quotes_.find(c) != str::npos;
   1.163 +  }
   1.164 +
   1.165 +  struct AdvanceState {
   1.166 +    bool in_quote;
   1.167 +    bool in_escape;
   1.168 +    char_type quote_char;
   1.169 +    AdvanceState() : in_quote(false), in_escape(false) {}
   1.170 +  };
   1.171 +
   1.172 +  // Returns true if a delimiter was not hit.
   1.173 +  bool AdvanceOne(AdvanceState* state, char_type c) {
   1.174 +    if (state->in_quote) {
   1.175 +      if (state->in_escape) {
   1.176 +        state->in_escape = false;
   1.177 +      } else if (c == '\\') {
   1.178 +        state->in_escape = true;
   1.179 +      } else if (c == state->quote_char) {
   1.180 +        state->in_quote = false;
   1.181 +      }
   1.182 +    } else {
   1.183 +      if (IsDelim(c))
   1.184 +        return false;
   1.185 +      state->in_quote = IsQuote(state->quote_char = c);
   1.186 +    }
   1.187 +    return true;
   1.188 +  }
   1.189 +
   1.190 +  const_iterator token_begin_;
   1.191 +  const_iterator token_end_;
   1.192 +  const_iterator end_;
   1.193 +  str delims_;
   1.194 +  str quotes_;
   1.195 +  int options_;
   1.196 +  bool token_is_delim_;
   1.197 +};
   1.198 +
   1.199 +typedef StringTokenizerT<std::string, std::string::const_iterator>
   1.200 +    StringTokenizer;
   1.201 +typedef StringTokenizerT<std::wstring, std::wstring::const_iterator>
   1.202 +    WStringTokenizer;
   1.203 +typedef StringTokenizerT<std::string, const char*> CStringTokenizer;
   1.204 +
   1.205 +#endif  // BASE_STRING_TOKENIZER_H_

mercurial