parser/expat/lib/xmltok_impl.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/expat/lib/xmltok_impl.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1779 @@
     1.4 +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
     1.5 +   See the file COPYING for copying permission.
     1.6 +*/
     1.7 +
     1.8 +#ifndef IS_INVALID_CHAR
     1.9 +#define IS_INVALID_CHAR(enc, ptr, n) (0)
    1.10 +#endif
    1.11 +
    1.12 +#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
    1.13 +    case BT_LEAD ## n: \
    1.14 +      if (end - ptr < n) \
    1.15 +        return XML_TOK_PARTIAL_CHAR; \
    1.16 +      if (IS_INVALID_CHAR(enc, ptr, n)) { \
    1.17 +        *(nextTokPtr) = (ptr); \
    1.18 +        return XML_TOK_INVALID; \
    1.19 +      } \
    1.20 +      ptr += n; \
    1.21 +      break;
    1.22 +
    1.23 +#define INVALID_CASES(ptr, nextTokPtr) \
    1.24 +  INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
    1.25 +  INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
    1.26 +  INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
    1.27 +  case BT_NONXML: \
    1.28 +  case BT_MALFORM: \
    1.29 +  case BT_TRAIL: \
    1.30 +    *(nextTokPtr) = (ptr); \
    1.31 +    return XML_TOK_INVALID;
    1.32 +
    1.33 +#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
    1.34 +   case BT_LEAD ## n: \
    1.35 +     if (end - ptr < n) \
    1.36 +       return XML_TOK_PARTIAL_CHAR; \
    1.37 +     if (!IS_NAME_CHAR(enc, ptr, n)) { \
    1.38 +       *nextTokPtr = ptr; \
    1.39 +       return XML_TOK_INVALID; \
    1.40 +     } \
    1.41 +     ptr += n; \
    1.42 +     break;
    1.43 +
    1.44 +#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
    1.45 +  case BT_NONASCII: \
    1.46 +    if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
    1.47 +      *nextTokPtr = ptr; \
    1.48 +      return XML_TOK_INVALID; \
    1.49 +    } \
    1.50 +  case BT_NMSTRT: \
    1.51 +  case BT_HEX: \
    1.52 +  case BT_DIGIT: \
    1.53 +  case BT_NAME: \
    1.54 +  case BT_MINUS: \
    1.55 +    ptr += MINBPC(enc); \
    1.56 +    break; \
    1.57 +  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
    1.58 +  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
    1.59 +  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
    1.60 +
    1.61 +#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
    1.62 +   case BT_LEAD ## n: \
    1.63 +     if (end - ptr < n) \
    1.64 +       return XML_TOK_PARTIAL_CHAR; \
    1.65 +     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
    1.66 +       *nextTokPtr = ptr; \
    1.67 +       return XML_TOK_INVALID; \
    1.68 +     } \
    1.69 +     ptr += n; \
    1.70 +     break;
    1.71 +
    1.72 +#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
    1.73 +  case BT_NONASCII: \
    1.74 +    if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
    1.75 +      *nextTokPtr = ptr; \
    1.76 +      return XML_TOK_INVALID; \
    1.77 +    } \
    1.78 +  case BT_NMSTRT: \
    1.79 +  case BT_HEX: \
    1.80 +    ptr += MINBPC(enc); \
    1.81 +    break; \
    1.82 +  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
    1.83 +  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
    1.84 +  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
    1.85 +
    1.86 +#ifndef PREFIX
    1.87 +#define PREFIX(ident) ident
    1.88 +#endif
    1.89 +
    1.90 +/* ptr points to character following "<!-" */
    1.91 +
    1.92 +static int PTRCALL
    1.93 +PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
    1.94 +                    const char *end, const char **nextTokPtr)
    1.95 +{
    1.96 +  if (ptr != end) {
    1.97 +    if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
    1.98 +      *nextTokPtr = ptr;
    1.99 +      return XML_TOK_INVALID;
   1.100 +    }
   1.101 +    ptr += MINBPC(enc);
   1.102 +    while (ptr != end) {
   1.103 +      switch (BYTE_TYPE(enc, ptr)) {
   1.104 +      INVALID_CASES(ptr, nextTokPtr)
   1.105 +      case BT_MINUS:
   1.106 +        if ((ptr += MINBPC(enc)) == end)
   1.107 +          return XML_TOK_PARTIAL;
   1.108 +        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
   1.109 +          if ((ptr += MINBPC(enc)) == end)
   1.110 +            return XML_TOK_PARTIAL;
   1.111 +          if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
   1.112 +            *nextTokPtr = ptr;
   1.113 +            return XML_TOK_INVALID;
   1.114 +          }
   1.115 +          *nextTokPtr = ptr + MINBPC(enc);
   1.116 +          return XML_TOK_COMMENT;
   1.117 +        }
   1.118 +        break;
   1.119 +      default:
   1.120 +        ptr += MINBPC(enc);
   1.121 +        break;
   1.122 +      }
   1.123 +    }
   1.124 +  }
   1.125 +  return XML_TOK_PARTIAL;
   1.126 +}
   1.127 +
   1.128 +/* ptr points to character following "<!" */
   1.129 +
   1.130 +static int PTRCALL
   1.131 +PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
   1.132 +                 const char *end, const char **nextTokPtr)
   1.133 +{
   1.134 +  if (ptr == end)
   1.135 +    return XML_TOK_PARTIAL;
   1.136 +  switch (BYTE_TYPE(enc, ptr)) {
   1.137 +  case BT_MINUS:
   1.138 +    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.139 +  case BT_LSQB:
   1.140 +    *nextTokPtr = ptr + MINBPC(enc);
   1.141 +    return XML_TOK_COND_SECT_OPEN;
   1.142 +  case BT_NMSTRT:
   1.143 +  case BT_HEX:
   1.144 +    ptr += MINBPC(enc);
   1.145 +    break;
   1.146 +  default:
   1.147 +    *nextTokPtr = ptr;
   1.148 +    return XML_TOK_INVALID;
   1.149 +  }
   1.150 +  while (ptr != end) {
   1.151 +    switch (BYTE_TYPE(enc, ptr)) {
   1.152 +    case BT_PERCNT:
   1.153 +      if (ptr + MINBPC(enc) == end)
   1.154 +        return XML_TOK_PARTIAL;
   1.155 +      /* don't allow <!ENTITY% foo "whatever"> */
   1.156 +      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
   1.157 +      case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
   1.158 +        *nextTokPtr = ptr;
   1.159 +        return XML_TOK_INVALID;
   1.160 +      }
   1.161 +      /* fall through */
   1.162 +    case BT_S: case BT_CR: case BT_LF:
   1.163 +      *nextTokPtr = ptr;
   1.164 +      return XML_TOK_DECL_OPEN;
   1.165 +    case BT_NMSTRT:
   1.166 +    case BT_HEX:
   1.167 +      ptr += MINBPC(enc);
   1.168 +      break;
   1.169 +    default:
   1.170 +      *nextTokPtr = ptr;
   1.171 +      return XML_TOK_INVALID;
   1.172 +    }
   1.173 +  }
   1.174 +  return XML_TOK_PARTIAL;
   1.175 +}
   1.176 +
   1.177 +static int PTRCALL
   1.178 +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
   1.179 +                      const char *end, int *tokPtr)
   1.180 +{
   1.181 +  int upper = 0;
   1.182 +  *tokPtr = XML_TOK_PI;
   1.183 +  if (end - ptr != MINBPC(enc)*3)
   1.184 +    return 1;
   1.185 +  switch (BYTE_TO_ASCII(enc, ptr)) {
   1.186 +  case ASCII_x:
   1.187 +    break;
   1.188 +  case ASCII_X:
   1.189 +    upper = 1;
   1.190 +    break;
   1.191 +  default:
   1.192 +    return 1;
   1.193 +  }
   1.194 +  ptr += MINBPC(enc);
   1.195 +  switch (BYTE_TO_ASCII(enc, ptr)) {
   1.196 +  case ASCII_m:
   1.197 +    break;
   1.198 +  case ASCII_M:
   1.199 +    upper = 1;
   1.200 +    break;
   1.201 +  default:
   1.202 +    return 1;
   1.203 +  }
   1.204 +  ptr += MINBPC(enc);
   1.205 +  switch (BYTE_TO_ASCII(enc, ptr)) {
   1.206 +  case ASCII_l:
   1.207 +    break;
   1.208 +  case ASCII_L:
   1.209 +    upper = 1;
   1.210 +    break;
   1.211 +  default:
   1.212 +    return 1;
   1.213 +  }
   1.214 +  if (upper)
   1.215 +    return 0;
   1.216 +  *tokPtr = XML_TOK_XML_DECL;
   1.217 +  return 1;
   1.218 +}
   1.219 +
   1.220 +/* ptr points to character following "<?" */
   1.221 +
   1.222 +static int PTRCALL
   1.223 +PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
   1.224 +               const char *end, const char **nextTokPtr)
   1.225 +{
   1.226 +  int tok;
   1.227 +  const char *target = ptr;
   1.228 +  if (ptr == end)
   1.229 +    return XML_TOK_PARTIAL;
   1.230 +  switch (BYTE_TYPE(enc, ptr)) {
   1.231 +  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.232 +  default:
   1.233 +    *nextTokPtr = ptr;
   1.234 +    return XML_TOK_INVALID;
   1.235 +  }
   1.236 +  while (ptr != end) {
   1.237 +    switch (BYTE_TYPE(enc, ptr)) {
   1.238 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
   1.239 +    case BT_S: case BT_CR: case BT_LF:
   1.240 +      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
   1.241 +        *nextTokPtr = ptr;
   1.242 +        return XML_TOK_INVALID;
   1.243 +      }
   1.244 +      ptr += MINBPC(enc);
   1.245 +      while (ptr != end) {
   1.246 +        switch (BYTE_TYPE(enc, ptr)) {
   1.247 +        INVALID_CASES(ptr, nextTokPtr)
   1.248 +        case BT_QUEST:
   1.249 +          ptr += MINBPC(enc);
   1.250 +          if (ptr == end)
   1.251 +            return XML_TOK_PARTIAL;
   1.252 +          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
   1.253 +            *nextTokPtr = ptr + MINBPC(enc);
   1.254 +            return tok;
   1.255 +          }
   1.256 +          break;
   1.257 +        default:
   1.258 +          ptr += MINBPC(enc);
   1.259 +          break;
   1.260 +        }
   1.261 +      }
   1.262 +      return XML_TOK_PARTIAL;
   1.263 +    case BT_QUEST:
   1.264 +      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
   1.265 +        *nextTokPtr = ptr;
   1.266 +        return XML_TOK_INVALID;
   1.267 +      }
   1.268 +      ptr += MINBPC(enc);
   1.269 +      if (ptr == end)
   1.270 +        return XML_TOK_PARTIAL;
   1.271 +      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
   1.272 +        *nextTokPtr = ptr + MINBPC(enc);
   1.273 +        return tok;
   1.274 +      }
   1.275 +      /* fall through */
   1.276 +    default:
   1.277 +      *nextTokPtr = ptr;
   1.278 +      return XML_TOK_INVALID;
   1.279 +    }
   1.280 +  }
   1.281 +  return XML_TOK_PARTIAL;
   1.282 +}
   1.283 +
   1.284 +static int PTRCALL
   1.285 +PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
   1.286 +                         const char *end, const char **nextTokPtr)
   1.287 +{
   1.288 +  static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
   1.289 +                                     ASCII_T, ASCII_A, ASCII_LSQB };
   1.290 +  int i;
   1.291 +  /* CDATA[ */
   1.292 +  if (end - ptr < 6 * MINBPC(enc))
   1.293 +    return XML_TOK_PARTIAL;
   1.294 +  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
   1.295 +    if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
   1.296 +      *nextTokPtr = ptr;
   1.297 +      return XML_TOK_INVALID;
   1.298 +    }
   1.299 +  }
   1.300 +  *nextTokPtr = ptr;
   1.301 +  return XML_TOK_CDATA_SECT_OPEN;
   1.302 +}
   1.303 +
   1.304 +static int PTRCALL
   1.305 +PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
   1.306 +                        const char *end, const char **nextTokPtr)
   1.307 +{
   1.308 +  if (ptr == end)
   1.309 +    return XML_TOK_NONE;
   1.310 +  if (MINBPC(enc) > 1) {
   1.311 +    size_t n = end - ptr;
   1.312 +    if (n & (MINBPC(enc) - 1)) {
   1.313 +      n &= ~(MINBPC(enc) - 1);
   1.314 +      if (n == 0)
   1.315 +        return XML_TOK_PARTIAL;
   1.316 +      end = ptr + n;
   1.317 +    }
   1.318 +  }
   1.319 +  switch (BYTE_TYPE(enc, ptr)) {
   1.320 +  case BT_RSQB:
   1.321 +    ptr += MINBPC(enc);
   1.322 +    if (ptr == end)
   1.323 +      return XML_TOK_PARTIAL;
   1.324 +    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
   1.325 +      break;
   1.326 +    ptr += MINBPC(enc);
   1.327 +    if (ptr == end)
   1.328 +      return XML_TOK_PARTIAL;
   1.329 +    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
   1.330 +      ptr -= MINBPC(enc);
   1.331 +      break;
   1.332 +    }
   1.333 +    *nextTokPtr = ptr + MINBPC(enc);
   1.334 +    return XML_TOK_CDATA_SECT_CLOSE;
   1.335 +  case BT_CR:
   1.336 +    ptr += MINBPC(enc);
   1.337 +    if (ptr == end)
   1.338 +      return XML_TOK_PARTIAL;
   1.339 +    if (BYTE_TYPE(enc, ptr) == BT_LF)
   1.340 +      ptr += MINBPC(enc);
   1.341 +    *nextTokPtr = ptr;
   1.342 +    return XML_TOK_DATA_NEWLINE;
   1.343 +  case BT_LF:
   1.344 +    *nextTokPtr = ptr + MINBPC(enc);
   1.345 +    return XML_TOK_DATA_NEWLINE;
   1.346 +  INVALID_CASES(ptr, nextTokPtr)
   1.347 +  default:
   1.348 +    ptr += MINBPC(enc);
   1.349 +    break;
   1.350 +  }
   1.351 +  while (ptr != end) {
   1.352 +    switch (BYTE_TYPE(enc, ptr)) {
   1.353 +#define LEAD_CASE(n) \
   1.354 +    case BT_LEAD ## n: \
   1.355 +      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
   1.356 +        *nextTokPtr = ptr; \
   1.357 +        return XML_TOK_DATA_CHARS; \
   1.358 +      } \
   1.359 +      ptr += n; \
   1.360 +      break;
   1.361 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
   1.362 +#undef LEAD_CASE
   1.363 +    case BT_NONXML:
   1.364 +    case BT_MALFORM:
   1.365 +    case BT_TRAIL:
   1.366 +    case BT_CR:
   1.367 +    case BT_LF:
   1.368 +    case BT_RSQB:
   1.369 +      *nextTokPtr = ptr;
   1.370 +      return XML_TOK_DATA_CHARS;
   1.371 +    default:
   1.372 +      ptr += MINBPC(enc);
   1.373 +      break;
   1.374 +    }
   1.375 +  }
   1.376 +  *nextTokPtr = ptr;
   1.377 +  return XML_TOK_DATA_CHARS;
   1.378 +}
   1.379 +
   1.380 +/* ptr points to character following "</" */
   1.381 +
   1.382 +static int PTRCALL
   1.383 +PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
   1.384 +                   const char *end, const char **nextTokPtr)
   1.385 +{
   1.386 +  if (ptr == end)
   1.387 +    return XML_TOK_PARTIAL;
   1.388 +  switch (BYTE_TYPE(enc, ptr)) {
   1.389 +  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.390 +  default:
   1.391 +    *nextTokPtr = ptr;
   1.392 +    return XML_TOK_INVALID;
   1.393 +  }
   1.394 +  while (ptr != end) {
   1.395 +    switch (BYTE_TYPE(enc, ptr)) {
   1.396 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
   1.397 +    case BT_S: case BT_CR: case BT_LF:
   1.398 +      for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
   1.399 +        switch (BYTE_TYPE(enc, ptr)) {
   1.400 +        case BT_S: case BT_CR: case BT_LF:
   1.401 +          break;
   1.402 +        case BT_GT:
   1.403 +          *nextTokPtr = ptr + MINBPC(enc);
   1.404 +          return XML_TOK_END_TAG;
   1.405 +        default:
   1.406 +          *nextTokPtr = ptr;
   1.407 +          return XML_TOK_INVALID;
   1.408 +        }
   1.409 +      }
   1.410 +      return XML_TOK_PARTIAL;
   1.411 +#ifdef XML_NS
   1.412 +    case BT_COLON:
   1.413 +      /* no need to check qname syntax here,
   1.414 +         since end-tag must match exactly */
   1.415 +      ptr += MINBPC(enc);
   1.416 +      break;
   1.417 +#endif
   1.418 +    case BT_GT:
   1.419 +      *nextTokPtr = ptr + MINBPC(enc);
   1.420 +      return XML_TOK_END_TAG;
   1.421 +    default:
   1.422 +      *nextTokPtr = ptr;
   1.423 +      return XML_TOK_INVALID;
   1.424 +    }
   1.425 +  }
   1.426 +  return XML_TOK_PARTIAL;
   1.427 +}
   1.428 +
   1.429 +/* ptr points to character following "&#X" */
   1.430 +
   1.431 +static int PTRCALL
   1.432 +PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
   1.433 +                       const char *end, const char **nextTokPtr)
   1.434 +{
   1.435 +  if (ptr != end) {
   1.436 +    switch (BYTE_TYPE(enc, ptr)) {
   1.437 +    case BT_DIGIT:
   1.438 +    case BT_HEX:
   1.439 +      break;
   1.440 +    default:
   1.441 +      *nextTokPtr = ptr;
   1.442 +      return XML_TOK_INVALID;
   1.443 +    }
   1.444 +    for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
   1.445 +      switch (BYTE_TYPE(enc, ptr)) {
   1.446 +      case BT_DIGIT:
   1.447 +      case BT_HEX:
   1.448 +        break;
   1.449 +      case BT_SEMI:
   1.450 +        *nextTokPtr = ptr + MINBPC(enc);
   1.451 +        return XML_TOK_CHAR_REF;
   1.452 +      default:
   1.453 +        *nextTokPtr = ptr;
   1.454 +        return XML_TOK_INVALID;
   1.455 +      }
   1.456 +    }
   1.457 +  }
   1.458 +  return XML_TOK_PARTIAL;
   1.459 +}
   1.460 +
   1.461 +/* ptr points to character following "&#" */
   1.462 +
   1.463 +static int PTRCALL
   1.464 +PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
   1.465 +                    const char *end, const char **nextTokPtr)
   1.466 +{
   1.467 +  if (ptr != end) {
   1.468 +    if (CHAR_MATCHES(enc, ptr, ASCII_x))
   1.469 +      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.470 +    switch (BYTE_TYPE(enc, ptr)) {
   1.471 +    case BT_DIGIT:
   1.472 +      break;
   1.473 +    default:
   1.474 +      *nextTokPtr = ptr;
   1.475 +      return XML_TOK_INVALID;
   1.476 +    }
   1.477 +    for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
   1.478 +      switch (BYTE_TYPE(enc, ptr)) {
   1.479 +      case BT_DIGIT:
   1.480 +        break;
   1.481 +      case BT_SEMI:
   1.482 +        *nextTokPtr = ptr + MINBPC(enc);
   1.483 +        return XML_TOK_CHAR_REF;
   1.484 +      default:
   1.485 +        *nextTokPtr = ptr;
   1.486 +        return XML_TOK_INVALID;
   1.487 +      }
   1.488 +    }
   1.489 +  }
   1.490 +  return XML_TOK_PARTIAL;
   1.491 +}
   1.492 +
   1.493 +/* ptr points to character following "&" */
   1.494 +
   1.495 +static int PTRCALL
   1.496 +PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
   1.497 +                const char **nextTokPtr)
   1.498 +{
   1.499 +  if (ptr == end)
   1.500 +    return XML_TOK_PARTIAL;
   1.501 +  switch (BYTE_TYPE(enc, ptr)) {
   1.502 +  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.503 +  case BT_NUM:
   1.504 +    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.505 +  default:
   1.506 +    *nextTokPtr = ptr;
   1.507 +    return XML_TOK_INVALID;
   1.508 +  }
   1.509 +  while (ptr != end) {
   1.510 +    switch (BYTE_TYPE(enc, ptr)) {
   1.511 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
   1.512 +    case BT_SEMI:
   1.513 +      *nextTokPtr = ptr + MINBPC(enc);
   1.514 +      return XML_TOK_ENTITY_REF;
   1.515 +    default:
   1.516 +      *nextTokPtr = ptr;
   1.517 +      return XML_TOK_INVALID;
   1.518 +    }
   1.519 +  }
   1.520 +  return XML_TOK_PARTIAL;
   1.521 +}
   1.522 +
   1.523 +/* ptr points to character following first character of attribute name */
   1.524 +
   1.525 +static int PTRCALL
   1.526 +PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
   1.527 +                 const char **nextTokPtr)
   1.528 +{
   1.529 +#ifdef XML_NS
   1.530 +  int hadColon = 0;
   1.531 +#endif
   1.532 +  while (ptr != end) {
   1.533 +    switch (BYTE_TYPE(enc, ptr)) {
   1.534 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
   1.535 +#ifdef XML_NS
   1.536 +    case BT_COLON:
   1.537 +      if (hadColon) {
   1.538 +        *nextTokPtr = ptr;
   1.539 +        return XML_TOK_INVALID;
   1.540 +      }
   1.541 +      hadColon = 1;
   1.542 +      ptr += MINBPC(enc);
   1.543 +      if (ptr == end)
   1.544 +        return XML_TOK_PARTIAL;
   1.545 +      switch (BYTE_TYPE(enc, ptr)) {
   1.546 +      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.547 +      default:
   1.548 +        *nextTokPtr = ptr;
   1.549 +        return XML_TOK_INVALID;
   1.550 +      }
   1.551 +      break;
   1.552 +#endif
   1.553 +    case BT_S: case BT_CR: case BT_LF:
   1.554 +      for (;;) {
   1.555 +        int t;
   1.556 +
   1.557 +        ptr += MINBPC(enc);
   1.558 +        if (ptr == end)
   1.559 +          return XML_TOK_PARTIAL;
   1.560 +        t = BYTE_TYPE(enc, ptr);
   1.561 +        if (t == BT_EQUALS)
   1.562 +          break;
   1.563 +        switch (t) {
   1.564 +        case BT_S:
   1.565 +        case BT_LF:
   1.566 +        case BT_CR:
   1.567 +          break;
   1.568 +        default:
   1.569 +          *nextTokPtr = ptr;
   1.570 +          return XML_TOK_INVALID;
   1.571 +        }
   1.572 +      }
   1.573 +    /* fall through */
   1.574 +    case BT_EQUALS:
   1.575 +      {
   1.576 +        int open;
   1.577 +#ifdef XML_NS
   1.578 +        hadColon = 0;
   1.579 +#endif
   1.580 +        for (;;) {
   1.581 +          ptr += MINBPC(enc);
   1.582 +          if (ptr == end)
   1.583 +            return XML_TOK_PARTIAL;
   1.584 +          open = BYTE_TYPE(enc, ptr);
   1.585 +          if (open == BT_QUOT || open == BT_APOS)
   1.586 +            break;
   1.587 +          switch (open) {
   1.588 +          case BT_S:
   1.589 +          case BT_LF:
   1.590 +          case BT_CR:
   1.591 +            break;
   1.592 +          default:
   1.593 +            *nextTokPtr = ptr;
   1.594 +            return XML_TOK_INVALID;
   1.595 +          }
   1.596 +        }
   1.597 +        ptr += MINBPC(enc);
   1.598 +        /* in attribute value */
   1.599 +        for (;;) {
   1.600 +          int t;
   1.601 +          if (ptr == end)
   1.602 +            return XML_TOK_PARTIAL;
   1.603 +          t = BYTE_TYPE(enc, ptr);
   1.604 +          if (t == open)
   1.605 +            break;
   1.606 +          switch (t) {
   1.607 +          INVALID_CASES(ptr, nextTokPtr)
   1.608 +          case BT_AMP:
   1.609 +            {
   1.610 +              int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
   1.611 +              if (tok <= 0) {
   1.612 +                if (tok == XML_TOK_INVALID)
   1.613 +                  *nextTokPtr = ptr;
   1.614 +                return tok;
   1.615 +              }
   1.616 +              break;
   1.617 +            }
   1.618 +          case BT_LT:
   1.619 +            *nextTokPtr = ptr;
   1.620 +            return XML_TOK_INVALID;
   1.621 +          default:
   1.622 +            ptr += MINBPC(enc);
   1.623 +            break;
   1.624 +          }
   1.625 +        }
   1.626 +        ptr += MINBPC(enc);
   1.627 +        if (ptr == end)
   1.628 +          return XML_TOK_PARTIAL;
   1.629 +        switch (BYTE_TYPE(enc, ptr)) {
   1.630 +        case BT_S:
   1.631 +        case BT_CR:
   1.632 +        case BT_LF:
   1.633 +          break;
   1.634 +        case BT_SOL:
   1.635 +          goto sol;
   1.636 +        case BT_GT:
   1.637 +          goto gt;
   1.638 +        default:
   1.639 +          *nextTokPtr = ptr;
   1.640 +          return XML_TOK_INVALID;
   1.641 +        }
   1.642 +        /* ptr points to closing quote */
   1.643 +        for (;;) {
   1.644 +          ptr += MINBPC(enc);
   1.645 +          if (ptr == end)
   1.646 +            return XML_TOK_PARTIAL;
   1.647 +          switch (BYTE_TYPE(enc, ptr)) {
   1.648 +          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.649 +          case BT_S: case BT_CR: case BT_LF:
   1.650 +            continue;
   1.651 +          case BT_GT:
   1.652 +          gt:
   1.653 +            *nextTokPtr = ptr + MINBPC(enc);
   1.654 +            return XML_TOK_START_TAG_WITH_ATTS;
   1.655 +          case BT_SOL:
   1.656 +          sol:
   1.657 +            ptr += MINBPC(enc);
   1.658 +            if (ptr == end)
   1.659 +              return XML_TOK_PARTIAL;
   1.660 +            if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
   1.661 +              *nextTokPtr = ptr;
   1.662 +              return XML_TOK_INVALID;
   1.663 +            }
   1.664 +            *nextTokPtr = ptr + MINBPC(enc);
   1.665 +            return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
   1.666 +          default:
   1.667 +            *nextTokPtr = ptr;
   1.668 +            return XML_TOK_INVALID;
   1.669 +          }
   1.670 +          break;
   1.671 +        }
   1.672 +        break;
   1.673 +      }
   1.674 +    default:
   1.675 +      *nextTokPtr = ptr;
   1.676 +      return XML_TOK_INVALID;
   1.677 +    }
   1.678 +  }
   1.679 +  return XML_TOK_PARTIAL;
   1.680 +}
   1.681 +
   1.682 +/* ptr points to character following "<" */
   1.683 +
   1.684 +static int PTRCALL
   1.685 +PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
   1.686 +               const char **nextTokPtr)
   1.687 +{
   1.688 +#ifdef XML_NS
   1.689 +  int hadColon;
   1.690 +#endif
   1.691 +  if (ptr == end)
   1.692 +    return XML_TOK_PARTIAL;
   1.693 +  switch (BYTE_TYPE(enc, ptr)) {
   1.694 +  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.695 +  case BT_EXCL:
   1.696 +    if ((ptr += MINBPC(enc)) == end)
   1.697 +      return XML_TOK_PARTIAL;
   1.698 +    switch (BYTE_TYPE(enc, ptr)) {
   1.699 +    case BT_MINUS:
   1.700 +      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.701 +    case BT_LSQB:
   1.702 +      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
   1.703 +                                      end, nextTokPtr);
   1.704 +    }
   1.705 +    *nextTokPtr = ptr;
   1.706 +    return XML_TOK_INVALID;
   1.707 +  case BT_QUEST:
   1.708 +    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.709 +  case BT_SOL:
   1.710 +    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.711 +  default:
   1.712 +    *nextTokPtr = ptr;
   1.713 +    return XML_TOK_INVALID;
   1.714 +  }
   1.715 +#ifdef XML_NS
   1.716 +  hadColon = 0;
   1.717 +#endif
   1.718 +  /* we have a start-tag */
   1.719 +  while (ptr != end) {
   1.720 +    switch (BYTE_TYPE(enc, ptr)) {
   1.721 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
   1.722 +#ifdef XML_NS
   1.723 +    case BT_COLON:
   1.724 +      if (hadColon) {
   1.725 +        *nextTokPtr = ptr;
   1.726 +        return XML_TOK_INVALID;
   1.727 +      }
   1.728 +      hadColon = 1;
   1.729 +      ptr += MINBPC(enc);
   1.730 +      if (ptr == end)
   1.731 +        return XML_TOK_PARTIAL;
   1.732 +      switch (BYTE_TYPE(enc, ptr)) {
   1.733 +      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.734 +      default:
   1.735 +        *nextTokPtr = ptr;
   1.736 +        return XML_TOK_INVALID;
   1.737 +      }
   1.738 +      break;
   1.739 +#endif
   1.740 +    case BT_S: case BT_CR: case BT_LF:
   1.741 +      {
   1.742 +        ptr += MINBPC(enc);
   1.743 +        while (ptr != end) {
   1.744 +          switch (BYTE_TYPE(enc, ptr)) {
   1.745 +          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.746 +          case BT_GT:
   1.747 +            goto gt;
   1.748 +          case BT_SOL:
   1.749 +            goto sol;
   1.750 +          case BT_S: case BT_CR: case BT_LF:
   1.751 +            ptr += MINBPC(enc);
   1.752 +            continue;
   1.753 +          default:
   1.754 +            *nextTokPtr = ptr;
   1.755 +            return XML_TOK_INVALID;
   1.756 +          }
   1.757 +          return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
   1.758 +        }
   1.759 +        return XML_TOK_PARTIAL;
   1.760 +      }
   1.761 +    case BT_GT:
   1.762 +    gt:
   1.763 +      *nextTokPtr = ptr + MINBPC(enc);
   1.764 +      return XML_TOK_START_TAG_NO_ATTS;
   1.765 +    case BT_SOL:
   1.766 +    sol:
   1.767 +      ptr += MINBPC(enc);
   1.768 +      if (ptr == end)
   1.769 +        return XML_TOK_PARTIAL;
   1.770 +      if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
   1.771 +        *nextTokPtr = ptr;
   1.772 +        return XML_TOK_INVALID;
   1.773 +      }
   1.774 +      *nextTokPtr = ptr + MINBPC(enc);
   1.775 +      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
   1.776 +    default:
   1.777 +      *nextTokPtr = ptr;
   1.778 +      return XML_TOK_INVALID;
   1.779 +    }
   1.780 +  }
   1.781 +  return XML_TOK_PARTIAL;
   1.782 +}
   1.783 +
   1.784 +static int PTRCALL
   1.785 +PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
   1.786 +                   const char **nextTokPtr)
   1.787 +{
   1.788 +  if (ptr == end)
   1.789 +    return XML_TOK_NONE;
   1.790 +  if (MINBPC(enc) > 1) {
   1.791 +    size_t n = end - ptr;
   1.792 +    if (n & (MINBPC(enc) - 1)) {
   1.793 +      n &= ~(MINBPC(enc) - 1);
   1.794 +      if (n == 0)
   1.795 +        return XML_TOK_PARTIAL;
   1.796 +      end = ptr + n;
   1.797 +    }
   1.798 +  }
   1.799 +  switch (BYTE_TYPE(enc, ptr)) {
   1.800 +  case BT_LT:
   1.801 +    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.802 +  case BT_AMP:
   1.803 +    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.804 +  case BT_CR:
   1.805 +    ptr += MINBPC(enc);
   1.806 +    if (ptr == end)
   1.807 +      return XML_TOK_TRAILING_CR;
   1.808 +    if (BYTE_TYPE(enc, ptr) == BT_LF)
   1.809 +      ptr += MINBPC(enc);
   1.810 +    *nextTokPtr = ptr;
   1.811 +    return XML_TOK_DATA_NEWLINE;
   1.812 +  case BT_LF:
   1.813 +    *nextTokPtr = ptr + MINBPC(enc);
   1.814 +    return XML_TOK_DATA_NEWLINE;
   1.815 +  case BT_RSQB:
   1.816 +    ptr += MINBPC(enc);
   1.817 +    if (ptr == end)
   1.818 +      return XML_TOK_TRAILING_RSQB;
   1.819 +    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
   1.820 +      break;
   1.821 +    ptr += MINBPC(enc);
   1.822 +    if (ptr == end)
   1.823 +      return XML_TOK_TRAILING_RSQB;
   1.824 +    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
   1.825 +      ptr -= MINBPC(enc);
   1.826 +      break;
   1.827 +    }
   1.828 +    *nextTokPtr = ptr;
   1.829 +    return XML_TOK_INVALID;
   1.830 +  INVALID_CASES(ptr, nextTokPtr)
   1.831 +  default:
   1.832 +    ptr += MINBPC(enc);
   1.833 +    break;
   1.834 +  }
   1.835 +  while (ptr != end) {
   1.836 +    switch (BYTE_TYPE(enc, ptr)) {
   1.837 +#define LEAD_CASE(n) \
   1.838 +    case BT_LEAD ## n: \
   1.839 +      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
   1.840 +        *nextTokPtr = ptr; \
   1.841 +        return XML_TOK_DATA_CHARS; \
   1.842 +      } \
   1.843 +      ptr += n; \
   1.844 +      break;
   1.845 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
   1.846 +#undef LEAD_CASE
   1.847 +    case BT_RSQB:
   1.848 +      if (ptr + MINBPC(enc) != end) {
   1.849 +         if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
   1.850 +           ptr += MINBPC(enc);
   1.851 +           break;
   1.852 +         }
   1.853 +         if (ptr + 2*MINBPC(enc) != end) {
   1.854 +           if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
   1.855 +             ptr += MINBPC(enc);
   1.856 +             break;
   1.857 +           }
   1.858 +           *nextTokPtr = ptr + 2*MINBPC(enc);
   1.859 +           return XML_TOK_INVALID;
   1.860 +         }
   1.861 +      }
   1.862 +      /* fall through */
   1.863 +    case BT_AMP:
   1.864 +    case BT_LT:
   1.865 +    case BT_NONXML:
   1.866 +    case BT_MALFORM:
   1.867 +    case BT_TRAIL:
   1.868 +    case BT_CR:
   1.869 +    case BT_LF:
   1.870 +      *nextTokPtr = ptr;
   1.871 +      return XML_TOK_DATA_CHARS;
   1.872 +    default:
   1.873 +      ptr += MINBPC(enc);
   1.874 +      break;
   1.875 +    }
   1.876 +  }
   1.877 +  *nextTokPtr = ptr;
   1.878 +  return XML_TOK_DATA_CHARS;
   1.879 +}
   1.880 +
   1.881 +/* ptr points to character following "%" */
   1.882 +
   1.883 +static int PTRCALL
   1.884 +PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
   1.885 +                    const char **nextTokPtr)
   1.886 +{
   1.887 +  if (ptr == end)
   1.888 +    return -XML_TOK_PERCENT;
   1.889 +  switch (BYTE_TYPE(enc, ptr)) {
   1.890 +  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.891 +  case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
   1.892 +    *nextTokPtr = ptr;
   1.893 +    return XML_TOK_PERCENT;
   1.894 +  default:
   1.895 +    *nextTokPtr = ptr;
   1.896 +    return XML_TOK_INVALID;
   1.897 +  }
   1.898 +  while (ptr != end) {
   1.899 +    switch (BYTE_TYPE(enc, ptr)) {
   1.900 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
   1.901 +    case BT_SEMI:
   1.902 +      *nextTokPtr = ptr + MINBPC(enc);
   1.903 +      return XML_TOK_PARAM_ENTITY_REF;
   1.904 +    default:
   1.905 +      *nextTokPtr = ptr;
   1.906 +      return XML_TOK_INVALID;
   1.907 +    }
   1.908 +  }
   1.909 +  return XML_TOK_PARTIAL;
   1.910 +}
   1.911 +
   1.912 +static int PTRCALL
   1.913 +PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
   1.914 +                      const char **nextTokPtr)
   1.915 +{
   1.916 +  if (ptr == end)
   1.917 +    return XML_TOK_PARTIAL;
   1.918 +  switch (BYTE_TYPE(enc, ptr)) {
   1.919 +  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   1.920 +  default:
   1.921 +    *nextTokPtr = ptr;
   1.922 +    return XML_TOK_INVALID;
   1.923 +  }
   1.924 +  while (ptr != end) {
   1.925 +    switch (BYTE_TYPE(enc, ptr)) {
   1.926 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
   1.927 +    case BT_CR: case BT_LF: case BT_S:
   1.928 +    case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
   1.929 +      *nextTokPtr = ptr;
   1.930 +      return XML_TOK_POUND_NAME;
   1.931 +    default:
   1.932 +      *nextTokPtr = ptr;
   1.933 +      return XML_TOK_INVALID;
   1.934 +    }
   1.935 +  }
   1.936 +  return -XML_TOK_POUND_NAME;
   1.937 +}
   1.938 +
   1.939 +static int PTRCALL
   1.940 +PREFIX(scanLit)(int open, const ENCODING *enc,
   1.941 +                const char *ptr, const char *end,
   1.942 +                const char **nextTokPtr)
   1.943 +{
   1.944 +  while (ptr != end) {
   1.945 +    int t = BYTE_TYPE(enc, ptr);
   1.946 +    switch (t) {
   1.947 +    INVALID_CASES(ptr, nextTokPtr)
   1.948 +    case BT_QUOT:
   1.949 +    case BT_APOS:
   1.950 +      ptr += MINBPC(enc);
   1.951 +      if (t != open)
   1.952 +        break;
   1.953 +      if (ptr == end)
   1.954 +        return -XML_TOK_LITERAL;
   1.955 +      *nextTokPtr = ptr;
   1.956 +      switch (BYTE_TYPE(enc, ptr)) {
   1.957 +      case BT_S: case BT_CR: case BT_LF:
   1.958 +      case BT_GT: case BT_PERCNT: case BT_LSQB:
   1.959 +        return XML_TOK_LITERAL;
   1.960 +      default:
   1.961 +        return XML_TOK_INVALID;
   1.962 +      }
   1.963 +    default:
   1.964 +      ptr += MINBPC(enc);
   1.965 +      break;
   1.966 +    }
   1.967 +  }
   1.968 +  return XML_TOK_PARTIAL;
   1.969 +}
   1.970 +
   1.971 +static int PTRCALL
   1.972 +PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
   1.973 +                  const char **nextTokPtr)
   1.974 +{
   1.975 +  int tok;
   1.976 +  if (ptr == end)
   1.977 +    return XML_TOK_NONE;
   1.978 +  if (MINBPC(enc) > 1) {
   1.979 +    size_t n = end - ptr;
   1.980 +    if (n & (MINBPC(enc) - 1)) {
   1.981 +      n &= ~(MINBPC(enc) - 1);
   1.982 +      if (n == 0)
   1.983 +        return XML_TOK_PARTIAL;
   1.984 +      end = ptr + n;
   1.985 +    }
   1.986 +  }
   1.987 +  switch (BYTE_TYPE(enc, ptr)) {
   1.988 +  case BT_QUOT:
   1.989 +    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.990 +  case BT_APOS:
   1.991 +    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
   1.992 +  case BT_LT:
   1.993 +    {
   1.994 +      ptr += MINBPC(enc);
   1.995 +      if (ptr == end)
   1.996 +        return XML_TOK_PARTIAL;
   1.997 +      switch (BYTE_TYPE(enc, ptr)) {
   1.998 +      case BT_EXCL:
   1.999 +        return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1.1000 +      case BT_QUEST:
  1.1001 +        return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1.1002 +      case BT_NMSTRT:
  1.1003 +      case BT_HEX:
  1.1004 +      case BT_NONASCII:
  1.1005 +      case BT_LEAD2:
  1.1006 +      case BT_LEAD3:
  1.1007 +      case BT_LEAD4:
  1.1008 +        *nextTokPtr = ptr - MINBPC(enc);
  1.1009 +        return XML_TOK_INSTANCE_START;
  1.1010 +      }
  1.1011 +      *nextTokPtr = ptr;
  1.1012 +      return XML_TOK_INVALID;
  1.1013 +    }
  1.1014 +  case BT_CR:
  1.1015 +    if (ptr + MINBPC(enc) == end) {
  1.1016 +      *nextTokPtr = end;
  1.1017 +      /* indicate that this might be part of a CR/LF pair */
  1.1018 +      return -XML_TOK_PROLOG_S;
  1.1019 +    }
  1.1020 +    /* fall through */
  1.1021 +  case BT_S: case BT_LF:
  1.1022 +    for (;;) {
  1.1023 +      ptr += MINBPC(enc);
  1.1024 +      if (ptr == end)
  1.1025 +        break;
  1.1026 +      switch (BYTE_TYPE(enc, ptr)) {
  1.1027 +      case BT_S: case BT_LF:
  1.1028 +        break;
  1.1029 +      case BT_CR:
  1.1030 +        /* don't split CR/LF pair */
  1.1031 +        if (ptr + MINBPC(enc) != end)
  1.1032 +          break;
  1.1033 +        /* fall through */
  1.1034 +      default:
  1.1035 +        *nextTokPtr = ptr;
  1.1036 +        return XML_TOK_PROLOG_S;
  1.1037 +      }
  1.1038 +    }
  1.1039 +    *nextTokPtr = ptr;
  1.1040 +    return XML_TOK_PROLOG_S;
  1.1041 +  case BT_PERCNT:
  1.1042 +    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1.1043 +  case BT_COMMA:
  1.1044 +    *nextTokPtr = ptr + MINBPC(enc);
  1.1045 +    return XML_TOK_COMMA;
  1.1046 +  case BT_LSQB:
  1.1047 +    *nextTokPtr = ptr + MINBPC(enc);
  1.1048 +    return XML_TOK_OPEN_BRACKET;
  1.1049 +  case BT_RSQB:
  1.1050 +    ptr += MINBPC(enc);
  1.1051 +    if (ptr == end)
  1.1052 +      return -XML_TOK_CLOSE_BRACKET;
  1.1053 +    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1.1054 +      if (ptr + MINBPC(enc) == end)
  1.1055 +        return XML_TOK_PARTIAL;
  1.1056 +      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
  1.1057 +        *nextTokPtr = ptr + 2*MINBPC(enc);
  1.1058 +        return XML_TOK_COND_SECT_CLOSE;
  1.1059 +      }
  1.1060 +    }
  1.1061 +    *nextTokPtr = ptr;
  1.1062 +    return XML_TOK_CLOSE_BRACKET;
  1.1063 +  case BT_LPAR:
  1.1064 +    *nextTokPtr = ptr + MINBPC(enc);
  1.1065 +    return XML_TOK_OPEN_PAREN;
  1.1066 +  case BT_RPAR:
  1.1067 +    ptr += MINBPC(enc);
  1.1068 +    if (ptr == end)
  1.1069 +      return -XML_TOK_CLOSE_PAREN;
  1.1070 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1071 +    case BT_AST:
  1.1072 +      *nextTokPtr = ptr + MINBPC(enc);
  1.1073 +      return XML_TOK_CLOSE_PAREN_ASTERISK;
  1.1074 +    case BT_QUEST:
  1.1075 +      *nextTokPtr = ptr + MINBPC(enc);
  1.1076 +      return XML_TOK_CLOSE_PAREN_QUESTION;
  1.1077 +    case BT_PLUS:
  1.1078 +      *nextTokPtr = ptr + MINBPC(enc);
  1.1079 +      return XML_TOK_CLOSE_PAREN_PLUS;
  1.1080 +    case BT_CR: case BT_LF: case BT_S:
  1.1081 +    case BT_GT: case BT_COMMA: case BT_VERBAR:
  1.1082 +    case BT_RPAR:
  1.1083 +      *nextTokPtr = ptr;
  1.1084 +      return XML_TOK_CLOSE_PAREN;
  1.1085 +    }
  1.1086 +    *nextTokPtr = ptr;
  1.1087 +    return XML_TOK_INVALID;
  1.1088 +  case BT_VERBAR:
  1.1089 +    *nextTokPtr = ptr + MINBPC(enc);
  1.1090 +    return XML_TOK_OR;
  1.1091 +  case BT_GT:
  1.1092 +    *nextTokPtr = ptr + MINBPC(enc);
  1.1093 +    return XML_TOK_DECL_CLOSE;
  1.1094 +  case BT_NUM:
  1.1095 +    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1.1096 +#define LEAD_CASE(n) \
  1.1097 +  case BT_LEAD ## n: \
  1.1098 +    if (end - ptr < n) \
  1.1099 +      return XML_TOK_PARTIAL_CHAR; \
  1.1100 +    if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
  1.1101 +      ptr += n; \
  1.1102 +      tok = XML_TOK_NAME; \
  1.1103 +      break; \
  1.1104 +    } \
  1.1105 +    if (IS_NAME_CHAR(enc, ptr, n)) { \
  1.1106 +      ptr += n; \
  1.1107 +      tok = XML_TOK_NMTOKEN; \
  1.1108 +      break; \
  1.1109 +    } \
  1.1110 +    *nextTokPtr = ptr; \
  1.1111 +    return XML_TOK_INVALID;
  1.1112 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1.1113 +#undef LEAD_CASE
  1.1114 +  case BT_NMSTRT:
  1.1115 +  case BT_HEX:
  1.1116 +    tok = XML_TOK_NAME;
  1.1117 +    ptr += MINBPC(enc);
  1.1118 +    break;
  1.1119 +  case BT_DIGIT:
  1.1120 +  case BT_NAME:
  1.1121 +  case BT_MINUS:
  1.1122 +#ifdef XML_NS
  1.1123 +  case BT_COLON:
  1.1124 +#endif
  1.1125 +    tok = XML_TOK_NMTOKEN;
  1.1126 +    ptr += MINBPC(enc);
  1.1127 +    break;
  1.1128 +  case BT_NONASCII:
  1.1129 +    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
  1.1130 +      ptr += MINBPC(enc);
  1.1131 +      tok = XML_TOK_NAME;
  1.1132 +      break;
  1.1133 +    }
  1.1134 +    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
  1.1135 +      ptr += MINBPC(enc);
  1.1136 +      tok = XML_TOK_NMTOKEN;
  1.1137 +      break;
  1.1138 +    }
  1.1139 +    /* fall through */
  1.1140 +  default:
  1.1141 +    *nextTokPtr = ptr;
  1.1142 +    return XML_TOK_INVALID;
  1.1143 +  }
  1.1144 +  while (ptr != end) {
  1.1145 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1146 +    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1.1147 +    case BT_GT: case BT_RPAR: case BT_COMMA:
  1.1148 +    case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
  1.1149 +    case BT_S: case BT_CR: case BT_LF:
  1.1150 +      *nextTokPtr = ptr;
  1.1151 +      return tok;
  1.1152 +#ifdef XML_NS
  1.1153 +    case BT_COLON:
  1.1154 +      ptr += MINBPC(enc);
  1.1155 +      switch (tok) {
  1.1156 +      case XML_TOK_NAME:
  1.1157 +        if (ptr == end)
  1.1158 +          return XML_TOK_PARTIAL;
  1.1159 +        tok = XML_TOK_PREFIXED_NAME;
  1.1160 +        switch (BYTE_TYPE(enc, ptr)) {
  1.1161 +        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1.1162 +        default:
  1.1163 +          tok = XML_TOK_NMTOKEN;
  1.1164 +          break;
  1.1165 +        }
  1.1166 +        break;
  1.1167 +      case XML_TOK_PREFIXED_NAME:
  1.1168 +        tok = XML_TOK_NMTOKEN;
  1.1169 +        break;
  1.1170 +      }
  1.1171 +      break;
  1.1172 +#endif
  1.1173 +    case BT_PLUS:
  1.1174 +      if (tok == XML_TOK_NMTOKEN)  {
  1.1175 +        *nextTokPtr = ptr;
  1.1176 +        return XML_TOK_INVALID;
  1.1177 +      }
  1.1178 +      *nextTokPtr = ptr + MINBPC(enc);
  1.1179 +      return XML_TOK_NAME_PLUS;
  1.1180 +    case BT_AST:
  1.1181 +      if (tok == XML_TOK_NMTOKEN)  {
  1.1182 +        *nextTokPtr = ptr;
  1.1183 +        return XML_TOK_INVALID;
  1.1184 +      }
  1.1185 +      *nextTokPtr = ptr + MINBPC(enc);
  1.1186 +      return XML_TOK_NAME_ASTERISK;
  1.1187 +    case BT_QUEST:
  1.1188 +      if (tok == XML_TOK_NMTOKEN)  {
  1.1189 +        *nextTokPtr = ptr;
  1.1190 +        return XML_TOK_INVALID;
  1.1191 +      }
  1.1192 +      *nextTokPtr = ptr + MINBPC(enc);
  1.1193 +      return XML_TOK_NAME_QUESTION;
  1.1194 +    default:
  1.1195 +      *nextTokPtr = ptr;
  1.1196 +      return XML_TOK_INVALID;
  1.1197 +    }
  1.1198 +  }
  1.1199 +  return -tok;
  1.1200 +}
  1.1201 +
  1.1202 +static int PTRCALL
  1.1203 +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
  1.1204 +                          const char *end, const char **nextTokPtr)
  1.1205 +{
  1.1206 +  const char *start;
  1.1207 +  if (ptr == end)
  1.1208 +    return XML_TOK_NONE;
  1.1209 +  start = ptr;
  1.1210 +  while (ptr != end) {
  1.1211 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1212 +#define LEAD_CASE(n) \
  1.1213 +    case BT_LEAD ## n: ptr += n; break;
  1.1214 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1.1215 +#undef LEAD_CASE
  1.1216 +    case BT_AMP:
  1.1217 +      if (ptr == start)
  1.1218 +        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1.1219 +      *nextTokPtr = ptr;
  1.1220 +      return XML_TOK_DATA_CHARS;
  1.1221 +    case BT_LT:
  1.1222 +      /* this is for inside entity references */
  1.1223 +      *nextTokPtr = ptr;
  1.1224 +      return XML_TOK_INVALID;
  1.1225 +    case BT_LF:
  1.1226 +      if (ptr == start) {
  1.1227 +        *nextTokPtr = ptr + MINBPC(enc);
  1.1228 +        return XML_TOK_DATA_NEWLINE;
  1.1229 +      }
  1.1230 +      *nextTokPtr = ptr;
  1.1231 +      return XML_TOK_DATA_CHARS;
  1.1232 +    case BT_CR:
  1.1233 +      if (ptr == start) {
  1.1234 +        ptr += MINBPC(enc);
  1.1235 +        if (ptr == end)
  1.1236 +          return XML_TOK_TRAILING_CR;
  1.1237 +        if (BYTE_TYPE(enc, ptr) == BT_LF)
  1.1238 +          ptr += MINBPC(enc);
  1.1239 +        *nextTokPtr = ptr;
  1.1240 +        return XML_TOK_DATA_NEWLINE;
  1.1241 +      }
  1.1242 +      *nextTokPtr = ptr;
  1.1243 +      return XML_TOK_DATA_CHARS;
  1.1244 +    case BT_S:
  1.1245 +      if (ptr == start) {
  1.1246 +        *nextTokPtr = ptr + MINBPC(enc);
  1.1247 +        return XML_TOK_ATTRIBUTE_VALUE_S;
  1.1248 +      }
  1.1249 +      *nextTokPtr = ptr;
  1.1250 +      return XML_TOK_DATA_CHARS;
  1.1251 +    default:
  1.1252 +      ptr += MINBPC(enc);
  1.1253 +      break;
  1.1254 +    }
  1.1255 +  }
  1.1256 +  *nextTokPtr = ptr;
  1.1257 +  return XML_TOK_DATA_CHARS;
  1.1258 +}
  1.1259 +
  1.1260 +static int PTRCALL
  1.1261 +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
  1.1262 +                       const char *end, const char **nextTokPtr)
  1.1263 +{
  1.1264 +  const char *start;
  1.1265 +  if (ptr == end)
  1.1266 +    return XML_TOK_NONE;
  1.1267 +  start = ptr;
  1.1268 +  while (ptr != end) {
  1.1269 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1270 +#define LEAD_CASE(n) \
  1.1271 +    case BT_LEAD ## n: ptr += n; break;
  1.1272 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1.1273 +#undef LEAD_CASE
  1.1274 +    case BT_AMP:
  1.1275 +      if (ptr == start)
  1.1276 +        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1.1277 +      *nextTokPtr = ptr;
  1.1278 +      return XML_TOK_DATA_CHARS;
  1.1279 +    case BT_PERCNT:
  1.1280 +      if (ptr == start) {
  1.1281 +        int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
  1.1282 +                                       end, nextTokPtr);
  1.1283 +        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
  1.1284 +      }
  1.1285 +      *nextTokPtr = ptr;
  1.1286 +      return XML_TOK_DATA_CHARS;
  1.1287 +    case BT_LF:
  1.1288 +      if (ptr == start) {
  1.1289 +        *nextTokPtr = ptr + MINBPC(enc);
  1.1290 +        return XML_TOK_DATA_NEWLINE;
  1.1291 +      }
  1.1292 +      *nextTokPtr = ptr;
  1.1293 +      return XML_TOK_DATA_CHARS;
  1.1294 +    case BT_CR:
  1.1295 +      if (ptr == start) {
  1.1296 +        ptr += MINBPC(enc);
  1.1297 +        if (ptr == end)
  1.1298 +          return XML_TOK_TRAILING_CR;
  1.1299 +        if (BYTE_TYPE(enc, ptr) == BT_LF)
  1.1300 +          ptr += MINBPC(enc);
  1.1301 +        *nextTokPtr = ptr;
  1.1302 +        return XML_TOK_DATA_NEWLINE;
  1.1303 +      }
  1.1304 +      *nextTokPtr = ptr;
  1.1305 +      return XML_TOK_DATA_CHARS;
  1.1306 +    default:
  1.1307 +      ptr += MINBPC(enc);
  1.1308 +      break;
  1.1309 +    }
  1.1310 +  }
  1.1311 +  *nextTokPtr = ptr;
  1.1312 +  return XML_TOK_DATA_CHARS;
  1.1313 +}
  1.1314 +
  1.1315 +#ifdef XML_DTD
  1.1316 +
  1.1317 +static int PTRCALL
  1.1318 +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
  1.1319 +                         const char *end, const char **nextTokPtr)
  1.1320 +{
  1.1321 +  int level = 0;
  1.1322 +  if (MINBPC(enc) > 1) {
  1.1323 +    size_t n = end - ptr;
  1.1324 +    if (n & (MINBPC(enc) - 1)) {
  1.1325 +      n &= ~(MINBPC(enc) - 1);
  1.1326 +      end = ptr + n;
  1.1327 +    }
  1.1328 +  }
  1.1329 +  while (ptr != end) {
  1.1330 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1331 +    INVALID_CASES(ptr, nextTokPtr)
  1.1332 +    case BT_LT:
  1.1333 +      if ((ptr += MINBPC(enc)) == end)
  1.1334 +        return XML_TOK_PARTIAL;
  1.1335 +      if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
  1.1336 +        if ((ptr += MINBPC(enc)) == end)
  1.1337 +          return XML_TOK_PARTIAL;
  1.1338 +        if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
  1.1339 +          ++level;
  1.1340 +          ptr += MINBPC(enc);
  1.1341 +        }
  1.1342 +      }
  1.1343 +      break;
  1.1344 +    case BT_RSQB:
  1.1345 +      if ((ptr += MINBPC(enc)) == end)
  1.1346 +        return XML_TOK_PARTIAL;
  1.1347 +      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1.1348 +        if ((ptr += MINBPC(enc)) == end)
  1.1349 +          return XML_TOK_PARTIAL;
  1.1350 +        if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  1.1351 +          ptr += MINBPC(enc);
  1.1352 +          if (level == 0) {
  1.1353 +            *nextTokPtr = ptr;
  1.1354 +            return XML_TOK_IGNORE_SECT;
  1.1355 +          }
  1.1356 +          --level;
  1.1357 +        }
  1.1358 +      }
  1.1359 +      break;
  1.1360 +    default:
  1.1361 +      ptr += MINBPC(enc);
  1.1362 +      break;
  1.1363 +    }
  1.1364 +  }
  1.1365 +  return XML_TOK_PARTIAL;
  1.1366 +}
  1.1367 +
  1.1368 +#endif /* XML_DTD */
  1.1369 +
  1.1370 +static int PTRCALL
  1.1371 +PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
  1.1372 +                   const char **badPtr)
  1.1373 +{
  1.1374 +  ptr += MINBPC(enc);
  1.1375 +  end -= MINBPC(enc);
  1.1376 +  for (; ptr != end; ptr += MINBPC(enc)) {
  1.1377 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1378 +    case BT_DIGIT:
  1.1379 +    case BT_HEX:
  1.1380 +    case BT_MINUS:
  1.1381 +    case BT_APOS:
  1.1382 +    case BT_LPAR:
  1.1383 +    case BT_RPAR:
  1.1384 +    case BT_PLUS:
  1.1385 +    case BT_COMMA:
  1.1386 +    case BT_SOL:
  1.1387 +    case BT_EQUALS:
  1.1388 +    case BT_QUEST:
  1.1389 +    case BT_CR:
  1.1390 +    case BT_LF:
  1.1391 +    case BT_SEMI:
  1.1392 +    case BT_EXCL:
  1.1393 +    case BT_AST:
  1.1394 +    case BT_PERCNT:
  1.1395 +    case BT_NUM:
  1.1396 +#ifdef XML_NS
  1.1397 +    case BT_COLON:
  1.1398 +#endif
  1.1399 +      break;
  1.1400 +    case BT_S:
  1.1401 +      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
  1.1402 +        *badPtr = ptr;
  1.1403 +        return 0;
  1.1404 +      }
  1.1405 +      break;
  1.1406 +    case BT_NAME:
  1.1407 +    case BT_NMSTRT:
  1.1408 +      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
  1.1409 +        break;
  1.1410 +    default:
  1.1411 +      switch (BYTE_TO_ASCII(enc, ptr)) {
  1.1412 +      case 0x24: /* $ */
  1.1413 +      case 0x40: /* @ */
  1.1414 +        break;
  1.1415 +      default:
  1.1416 +        *badPtr = ptr;
  1.1417 +        return 0;
  1.1418 +      }
  1.1419 +      break;
  1.1420 +    }
  1.1421 +  }
  1.1422 +  return 1;
  1.1423 +}
  1.1424 +
  1.1425 +/* This must only be called for a well-formed start-tag or empty
  1.1426 +   element tag.  Returns the number of attributes.  Pointers to the
  1.1427 +   first attsMax attributes are stored in atts.
  1.1428 +*/
  1.1429 +
  1.1430 +static int PTRCALL
  1.1431 +PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
  1.1432 +                int attsMax, ATTRIBUTE *atts)
  1.1433 +{
  1.1434 +  enum { other, inName, inValue } state = inName;
  1.1435 +  int nAtts = 0;
  1.1436 +  int open = 0; /* defined when state == inValue;
  1.1437 +                   initialization just to shut up compilers */
  1.1438 +
  1.1439 +  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
  1.1440 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1441 +#define START_NAME \
  1.1442 +      if (state == other) { \
  1.1443 +        if (nAtts < attsMax) { \
  1.1444 +          atts[nAtts].name = ptr; \
  1.1445 +          atts[nAtts].normalized = 1; \
  1.1446 +        } \
  1.1447 +        state = inName; \
  1.1448 +      }
  1.1449 +#define LEAD_CASE(n) \
  1.1450 +    case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
  1.1451 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1.1452 +#undef LEAD_CASE
  1.1453 +    case BT_NONASCII:
  1.1454 +    case BT_NMSTRT:
  1.1455 +    case BT_HEX:
  1.1456 +      START_NAME
  1.1457 +      break;
  1.1458 +#undef START_NAME
  1.1459 +    case BT_QUOT:
  1.1460 +      if (state != inValue) {
  1.1461 +        if (nAtts < attsMax)
  1.1462 +          atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1.1463 +        state = inValue;
  1.1464 +        open = BT_QUOT;
  1.1465 +      }
  1.1466 +      else if (open == BT_QUOT) {
  1.1467 +        state = other;
  1.1468 +        if (nAtts < attsMax)
  1.1469 +          atts[nAtts].valueEnd = ptr;
  1.1470 +        nAtts++;
  1.1471 +      }
  1.1472 +      break;
  1.1473 +    case BT_APOS:
  1.1474 +      if (state != inValue) {
  1.1475 +        if (nAtts < attsMax)
  1.1476 +          atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1.1477 +        state = inValue;
  1.1478 +        open = BT_APOS;
  1.1479 +      }
  1.1480 +      else if (open == BT_APOS) {
  1.1481 +        state = other;
  1.1482 +        if (nAtts < attsMax)
  1.1483 +          atts[nAtts].valueEnd = ptr;
  1.1484 +        nAtts++;
  1.1485 +      }
  1.1486 +      break;
  1.1487 +    case BT_AMP:
  1.1488 +      if (nAtts < attsMax)
  1.1489 +        atts[nAtts].normalized = 0;
  1.1490 +      break;
  1.1491 +    case BT_S:
  1.1492 +      if (state == inName)
  1.1493 +        state = other;
  1.1494 +      else if (state == inValue
  1.1495 +               && nAtts < attsMax
  1.1496 +               && atts[nAtts].normalized
  1.1497 +               && (ptr == atts[nAtts].valuePtr
  1.1498 +                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
  1.1499 +                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
  1.1500 +                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
  1.1501 +        atts[nAtts].normalized = 0;
  1.1502 +      break;
  1.1503 +    case BT_CR: case BT_LF:
  1.1504 +      /* This case ensures that the first attribute name is counted
  1.1505 +         Apart from that we could just change state on the quote. */
  1.1506 +      if (state == inName)
  1.1507 +        state = other;
  1.1508 +      else if (state == inValue && nAtts < attsMax)
  1.1509 +        atts[nAtts].normalized = 0;
  1.1510 +      break;
  1.1511 +    case BT_GT:
  1.1512 +    case BT_SOL:
  1.1513 +      if (state != inValue)
  1.1514 +        return nAtts;
  1.1515 +      break;
  1.1516 +    default:
  1.1517 +      break;
  1.1518 +    }
  1.1519 +  }
  1.1520 +  /* not reached */
  1.1521 +}
  1.1522 +
  1.1523 +static int PTRFASTCALL
  1.1524 +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
  1.1525 +{
  1.1526 +  int result = 0;
  1.1527 +  /* skip &# */
  1.1528 +  ptr += 2*MINBPC(enc);
  1.1529 +  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
  1.1530 +    for (ptr += MINBPC(enc);
  1.1531 +         !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
  1.1532 +         ptr += MINBPC(enc)) {
  1.1533 +      int c = BYTE_TO_ASCII(enc, ptr);
  1.1534 +      switch (c) {
  1.1535 +      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
  1.1536 +      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
  1.1537 +        result <<= 4;
  1.1538 +        result |= (c - ASCII_0);
  1.1539 +        break;
  1.1540 +      case ASCII_A: case ASCII_B: case ASCII_C:
  1.1541 +      case ASCII_D: case ASCII_E: case ASCII_F:
  1.1542 +        result <<= 4;
  1.1543 +        result += 10 + (c - ASCII_A);
  1.1544 +        break;
  1.1545 +      case ASCII_a: case ASCII_b: case ASCII_c:
  1.1546 +      case ASCII_d: case ASCII_e: case ASCII_f:
  1.1547 +        result <<= 4;
  1.1548 +        result += 10 + (c - ASCII_a);
  1.1549 +        break;
  1.1550 +      }
  1.1551 +      if (result >= 0x110000)
  1.1552 +        return -1;
  1.1553 +    }
  1.1554 +  }
  1.1555 +  else {
  1.1556 +    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
  1.1557 +      int c = BYTE_TO_ASCII(enc, ptr);
  1.1558 +      result *= 10;
  1.1559 +      result += (c - ASCII_0);
  1.1560 +      if (result >= 0x110000)
  1.1561 +        return -1;
  1.1562 +    }
  1.1563 +  }
  1.1564 +  return checkCharRefNumber(result);
  1.1565 +}
  1.1566 +
  1.1567 +static int PTRCALL
  1.1568 +PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
  1.1569 +                             const char *end)
  1.1570 +{
  1.1571 +  switch ((end - ptr)/MINBPC(enc)) {
  1.1572 +  case 2:
  1.1573 +    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
  1.1574 +      switch (BYTE_TO_ASCII(enc, ptr)) {
  1.1575 +      case ASCII_l:
  1.1576 +        return ASCII_LT;
  1.1577 +      case ASCII_g:
  1.1578 +        return ASCII_GT;
  1.1579 +      }
  1.1580 +    }
  1.1581 +    break;
  1.1582 +  case 3:
  1.1583 +    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
  1.1584 +      ptr += MINBPC(enc);
  1.1585 +      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
  1.1586 +        ptr += MINBPC(enc);
  1.1587 +        if (CHAR_MATCHES(enc, ptr, ASCII_p))
  1.1588 +          return ASCII_AMP;
  1.1589 +      }
  1.1590 +    }
  1.1591 +    break;
  1.1592 +  case 4:
  1.1593 +    switch (BYTE_TO_ASCII(enc, ptr)) {
  1.1594 +    case ASCII_q:
  1.1595 +      ptr += MINBPC(enc);
  1.1596 +      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
  1.1597 +        ptr += MINBPC(enc);
  1.1598 +        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1.1599 +          ptr += MINBPC(enc);
  1.1600 +          if (CHAR_MATCHES(enc, ptr, ASCII_t))
  1.1601 +            return ASCII_QUOT;
  1.1602 +        }
  1.1603 +      }
  1.1604 +      break;
  1.1605 +    case ASCII_a:
  1.1606 +      ptr += MINBPC(enc);
  1.1607 +      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
  1.1608 +        ptr += MINBPC(enc);
  1.1609 +        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1.1610 +          ptr += MINBPC(enc);
  1.1611 +          if (CHAR_MATCHES(enc, ptr, ASCII_s))
  1.1612 +            return ASCII_APOS;
  1.1613 +        }
  1.1614 +      }
  1.1615 +      break;
  1.1616 +    }
  1.1617 +  }
  1.1618 +  return 0;
  1.1619 +}
  1.1620 +
  1.1621 +static int PTRCALL
  1.1622 +PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
  1.1623 +{
  1.1624 +  for (;;) {
  1.1625 +    switch (BYTE_TYPE(enc, ptr1)) {
  1.1626 +#define LEAD_CASE(n) \
  1.1627 +    case BT_LEAD ## n: \
  1.1628 +      if (*ptr1++ != *ptr2++) \
  1.1629 +        return 0;
  1.1630 +    LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
  1.1631 +#undef LEAD_CASE
  1.1632 +      /* fall through */
  1.1633 +      if (*ptr1++ != *ptr2++)
  1.1634 +        return 0;
  1.1635 +      break;
  1.1636 +    case BT_NONASCII:
  1.1637 +    case BT_NMSTRT:
  1.1638 +#ifdef XML_NS
  1.1639 +    case BT_COLON:
  1.1640 +#endif
  1.1641 +    case BT_HEX:
  1.1642 +    case BT_DIGIT:
  1.1643 +    case BT_NAME:
  1.1644 +    case BT_MINUS:
  1.1645 +      if (*ptr2++ != *ptr1++)
  1.1646 +        return 0;
  1.1647 +      if (MINBPC(enc) > 1) {
  1.1648 +        if (*ptr2++ != *ptr1++)
  1.1649 +          return 0;
  1.1650 +        if (MINBPC(enc) > 2) {
  1.1651 +          if (*ptr2++ != *ptr1++)
  1.1652 +            return 0;
  1.1653 +          if (MINBPC(enc) > 3) {
  1.1654 +            if (*ptr2++ != *ptr1++)
  1.1655 +              return 0;
  1.1656 +          }
  1.1657 +        }
  1.1658 +      }
  1.1659 +      break;
  1.1660 +    default:
  1.1661 +      if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
  1.1662 +        return 1;
  1.1663 +      switch (BYTE_TYPE(enc, ptr2)) {
  1.1664 +      case BT_LEAD2:
  1.1665 +      case BT_LEAD3:
  1.1666 +      case BT_LEAD4:
  1.1667 +      case BT_NONASCII:
  1.1668 +      case BT_NMSTRT:
  1.1669 +#ifdef XML_NS
  1.1670 +      case BT_COLON:
  1.1671 +#endif
  1.1672 +      case BT_HEX:
  1.1673 +      case BT_DIGIT:
  1.1674 +      case BT_NAME:
  1.1675 +      case BT_MINUS:
  1.1676 +        return 0;
  1.1677 +      default:
  1.1678 +        return 1;
  1.1679 +      }
  1.1680 +    }
  1.1681 +  }
  1.1682 +  /* not reached */
  1.1683 +}
  1.1684 +
  1.1685 +static int PTRCALL
  1.1686 +PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
  1.1687 +                         const char *end1, const char *ptr2)
  1.1688 +{
  1.1689 +  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
  1.1690 +    if (ptr1 == end1)
  1.1691 +      return 0;
  1.1692 +    if (!CHAR_MATCHES(enc, ptr1, *ptr2))
  1.1693 +      return 0;
  1.1694 +  }
  1.1695 +  return ptr1 == end1;
  1.1696 +}
  1.1697 +
  1.1698 +static int PTRFASTCALL
  1.1699 +PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
  1.1700 +{
  1.1701 +  const char *start = ptr;
  1.1702 +  for (;;) {
  1.1703 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1704 +#define LEAD_CASE(n) \
  1.1705 +    case BT_LEAD ## n: ptr += n; break;
  1.1706 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1.1707 +#undef LEAD_CASE
  1.1708 +    case BT_NONASCII:
  1.1709 +    case BT_NMSTRT:
  1.1710 +#ifdef XML_NS
  1.1711 +    case BT_COLON:
  1.1712 +#endif
  1.1713 +    case BT_HEX:
  1.1714 +    case BT_DIGIT:
  1.1715 +    case BT_NAME:
  1.1716 +    case BT_MINUS:
  1.1717 +      ptr += MINBPC(enc);
  1.1718 +      break;
  1.1719 +    default:
  1.1720 +      return (int)(ptr - start);
  1.1721 +    }
  1.1722 +  }
  1.1723 +}
  1.1724 +
  1.1725 +static const char * PTRFASTCALL
  1.1726 +PREFIX(skipS)(const ENCODING *enc, const char *ptr)
  1.1727 +{
  1.1728 +  for (;;) {
  1.1729 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1730 +    case BT_LF:
  1.1731 +    case BT_CR:
  1.1732 +    case BT_S:
  1.1733 +      ptr += MINBPC(enc);
  1.1734 +      break;
  1.1735 +    default:
  1.1736 +      return ptr;
  1.1737 +    }
  1.1738 +  }
  1.1739 +}
  1.1740 +
  1.1741 +static void PTRCALL
  1.1742 +PREFIX(updatePosition)(const ENCODING *enc,
  1.1743 +                       const char *ptr,
  1.1744 +                       const char *end,
  1.1745 +                       POSITION *pos)
  1.1746 +{
  1.1747 +  while (ptr != end) {
  1.1748 +    switch (BYTE_TYPE(enc, ptr)) {
  1.1749 +#define LEAD_CASE(n) \
  1.1750 +    case BT_LEAD ## n: \
  1.1751 +      ptr += n; \
  1.1752 +      break;
  1.1753 +    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1.1754 +#undef LEAD_CASE
  1.1755 +    case BT_LF:
  1.1756 +      pos->columnNumber = (XML_Size)-1;
  1.1757 +      pos->lineNumber++;
  1.1758 +      ptr += MINBPC(enc);
  1.1759 +      break;
  1.1760 +    case BT_CR:
  1.1761 +      pos->lineNumber++;
  1.1762 +      ptr += MINBPC(enc);
  1.1763 +      if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
  1.1764 +        ptr += MINBPC(enc);
  1.1765 +      pos->columnNumber = (XML_Size)-1;
  1.1766 +      break;
  1.1767 +    default:
  1.1768 +      ptr += MINBPC(enc);
  1.1769 +      break;
  1.1770 +    }
  1.1771 +    pos->columnNumber++;
  1.1772 +  }
  1.1773 +}
  1.1774 +
  1.1775 +#undef DO_LEAD_CASE
  1.1776 +#undef MULTIBYTE_CASES
  1.1777 +#undef INVALID_CASES
  1.1778 +#undef CHECK_NAME_CASE
  1.1779 +#undef CHECK_NAME_CASES
  1.1780 +#undef CHECK_NMSTRT_CASE
  1.1781 +#undef CHECK_NMSTRT_CASES
  1.1782 +

mercurial