1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/expat/lib/xmltok_impl.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1779 @@ 1.4 +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 1.5 + See the file COPYING for copying permission. 1.6 +*/ 1.7 + 1.8 +#ifndef IS_INVALID_CHAR 1.9 +#define IS_INVALID_CHAR(enc, ptr, n) (0) 1.10 +#endif 1.11 + 1.12 +#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ 1.13 + case BT_LEAD ## n: \ 1.14 + if (end - ptr < n) \ 1.15 + return XML_TOK_PARTIAL_CHAR; \ 1.16 + if (IS_INVALID_CHAR(enc, ptr, n)) { \ 1.17 + *(nextTokPtr) = (ptr); \ 1.18 + return XML_TOK_INVALID; \ 1.19 + } \ 1.20 + ptr += n; \ 1.21 + break; 1.22 + 1.23 +#define INVALID_CASES(ptr, nextTokPtr) \ 1.24 + INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ 1.25 + INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ 1.26 + INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ 1.27 + case BT_NONXML: \ 1.28 + case BT_MALFORM: \ 1.29 + case BT_TRAIL: \ 1.30 + *(nextTokPtr) = (ptr); \ 1.31 + return XML_TOK_INVALID; 1.32 + 1.33 +#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ 1.34 + case BT_LEAD ## n: \ 1.35 + if (end - ptr < n) \ 1.36 + return XML_TOK_PARTIAL_CHAR; \ 1.37 + if (!IS_NAME_CHAR(enc, ptr, n)) { \ 1.38 + *nextTokPtr = ptr; \ 1.39 + return XML_TOK_INVALID; \ 1.40 + } \ 1.41 + ptr += n; \ 1.42 + break; 1.43 + 1.44 +#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ 1.45 + case BT_NONASCII: \ 1.46 + if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ 1.47 + *nextTokPtr = ptr; \ 1.48 + return XML_TOK_INVALID; \ 1.49 + } \ 1.50 + case BT_NMSTRT: \ 1.51 + case BT_HEX: \ 1.52 + case BT_DIGIT: \ 1.53 + case BT_NAME: \ 1.54 + case BT_MINUS: \ 1.55 + ptr += MINBPC(enc); \ 1.56 + break; \ 1.57 + CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ 1.58 + CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ 1.59 + CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) 1.60 + 1.61 +#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ 1.62 + case BT_LEAD ## n: \ 1.63 + if (end - ptr < n) \ 1.64 + return XML_TOK_PARTIAL_CHAR; \ 1.65 + if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ 1.66 + *nextTokPtr = ptr; \ 1.67 + return XML_TOK_INVALID; \ 1.68 + } \ 1.69 + ptr += n; \ 1.70 + break; 1.71 + 1.72 +#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ 1.73 + case BT_NONASCII: \ 1.74 + if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ 1.75 + *nextTokPtr = ptr; \ 1.76 + return XML_TOK_INVALID; \ 1.77 + } \ 1.78 + case BT_NMSTRT: \ 1.79 + case BT_HEX: \ 1.80 + ptr += MINBPC(enc); \ 1.81 + break; \ 1.82 + CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 1.83 + CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 1.84 + CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 1.85 + 1.86 +#ifndef PREFIX 1.87 +#define PREFIX(ident) ident 1.88 +#endif 1.89 + 1.90 +/* ptr points to character following "<!-" */ 1.91 + 1.92 +static int PTRCALL 1.93 +PREFIX(scanComment)(const ENCODING *enc, const char *ptr, 1.94 + const char *end, const char **nextTokPtr) 1.95 +{ 1.96 + if (ptr != end) { 1.97 + if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 1.98 + *nextTokPtr = ptr; 1.99 + return XML_TOK_INVALID; 1.100 + } 1.101 + ptr += MINBPC(enc); 1.102 + while (ptr != end) { 1.103 + switch (BYTE_TYPE(enc, ptr)) { 1.104 + INVALID_CASES(ptr, nextTokPtr) 1.105 + case BT_MINUS: 1.106 + if ((ptr += MINBPC(enc)) == end) 1.107 + return XML_TOK_PARTIAL; 1.108 + if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 1.109 + if ((ptr += MINBPC(enc)) == end) 1.110 + return XML_TOK_PARTIAL; 1.111 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.112 + *nextTokPtr = ptr; 1.113 + return XML_TOK_INVALID; 1.114 + } 1.115 + *nextTokPtr = ptr + MINBPC(enc); 1.116 + return XML_TOK_COMMENT; 1.117 + } 1.118 + break; 1.119 + default: 1.120 + ptr += MINBPC(enc); 1.121 + break; 1.122 + } 1.123 + } 1.124 + } 1.125 + return XML_TOK_PARTIAL; 1.126 +} 1.127 + 1.128 +/* ptr points to character following "<!" */ 1.129 + 1.130 +static int PTRCALL 1.131 +PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, 1.132 + const char *end, const char **nextTokPtr) 1.133 +{ 1.134 + if (ptr == end) 1.135 + return XML_TOK_PARTIAL; 1.136 + switch (BYTE_TYPE(enc, ptr)) { 1.137 + case BT_MINUS: 1.138 + return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.139 + case BT_LSQB: 1.140 + *nextTokPtr = ptr + MINBPC(enc); 1.141 + return XML_TOK_COND_SECT_OPEN; 1.142 + case BT_NMSTRT: 1.143 + case BT_HEX: 1.144 + ptr += MINBPC(enc); 1.145 + break; 1.146 + default: 1.147 + *nextTokPtr = ptr; 1.148 + return XML_TOK_INVALID; 1.149 + } 1.150 + while (ptr != end) { 1.151 + switch (BYTE_TYPE(enc, ptr)) { 1.152 + case BT_PERCNT: 1.153 + if (ptr + MINBPC(enc) == end) 1.154 + return XML_TOK_PARTIAL; 1.155 + /* don't allow <!ENTITY% foo "whatever"> */ 1.156 + switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { 1.157 + case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: 1.158 + *nextTokPtr = ptr; 1.159 + return XML_TOK_INVALID; 1.160 + } 1.161 + /* fall through */ 1.162 + case BT_S: case BT_CR: case BT_LF: 1.163 + *nextTokPtr = ptr; 1.164 + return XML_TOK_DECL_OPEN; 1.165 + case BT_NMSTRT: 1.166 + case BT_HEX: 1.167 + ptr += MINBPC(enc); 1.168 + break; 1.169 + default: 1.170 + *nextTokPtr = ptr; 1.171 + return XML_TOK_INVALID; 1.172 + } 1.173 + } 1.174 + return XML_TOK_PARTIAL; 1.175 +} 1.176 + 1.177 +static int PTRCALL 1.178 +PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, 1.179 + const char *end, int *tokPtr) 1.180 +{ 1.181 + int upper = 0; 1.182 + *tokPtr = XML_TOK_PI; 1.183 + if (end - ptr != MINBPC(enc)*3) 1.184 + return 1; 1.185 + switch (BYTE_TO_ASCII(enc, ptr)) { 1.186 + case ASCII_x: 1.187 + break; 1.188 + case ASCII_X: 1.189 + upper = 1; 1.190 + break; 1.191 + default: 1.192 + return 1; 1.193 + } 1.194 + ptr += MINBPC(enc); 1.195 + switch (BYTE_TO_ASCII(enc, ptr)) { 1.196 + case ASCII_m: 1.197 + break; 1.198 + case ASCII_M: 1.199 + upper = 1; 1.200 + break; 1.201 + default: 1.202 + return 1; 1.203 + } 1.204 + ptr += MINBPC(enc); 1.205 + switch (BYTE_TO_ASCII(enc, ptr)) { 1.206 + case ASCII_l: 1.207 + break; 1.208 + case ASCII_L: 1.209 + upper = 1; 1.210 + break; 1.211 + default: 1.212 + return 1; 1.213 + } 1.214 + if (upper) 1.215 + return 0; 1.216 + *tokPtr = XML_TOK_XML_DECL; 1.217 + return 1; 1.218 +} 1.219 + 1.220 +/* ptr points to character following "<?" */ 1.221 + 1.222 +static int PTRCALL 1.223 +PREFIX(scanPi)(const ENCODING *enc, const char *ptr, 1.224 + const char *end, const char **nextTokPtr) 1.225 +{ 1.226 + int tok; 1.227 + const char *target = ptr; 1.228 + if (ptr == end) 1.229 + return XML_TOK_PARTIAL; 1.230 + switch (BYTE_TYPE(enc, ptr)) { 1.231 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.232 + default: 1.233 + *nextTokPtr = ptr; 1.234 + return XML_TOK_INVALID; 1.235 + } 1.236 + while (ptr != end) { 1.237 + switch (BYTE_TYPE(enc, ptr)) { 1.238 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.239 + case BT_S: case BT_CR: case BT_LF: 1.240 + if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 1.241 + *nextTokPtr = ptr; 1.242 + return XML_TOK_INVALID; 1.243 + } 1.244 + ptr += MINBPC(enc); 1.245 + while (ptr != end) { 1.246 + switch (BYTE_TYPE(enc, ptr)) { 1.247 + INVALID_CASES(ptr, nextTokPtr) 1.248 + case BT_QUEST: 1.249 + ptr += MINBPC(enc); 1.250 + if (ptr == end) 1.251 + return XML_TOK_PARTIAL; 1.252 + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.253 + *nextTokPtr = ptr + MINBPC(enc); 1.254 + return tok; 1.255 + } 1.256 + break; 1.257 + default: 1.258 + ptr += MINBPC(enc); 1.259 + break; 1.260 + } 1.261 + } 1.262 + return XML_TOK_PARTIAL; 1.263 + case BT_QUEST: 1.264 + if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 1.265 + *nextTokPtr = ptr; 1.266 + return XML_TOK_INVALID; 1.267 + } 1.268 + ptr += MINBPC(enc); 1.269 + if (ptr == end) 1.270 + return XML_TOK_PARTIAL; 1.271 + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.272 + *nextTokPtr = ptr + MINBPC(enc); 1.273 + return tok; 1.274 + } 1.275 + /* fall through */ 1.276 + default: 1.277 + *nextTokPtr = ptr; 1.278 + return XML_TOK_INVALID; 1.279 + } 1.280 + } 1.281 + return XML_TOK_PARTIAL; 1.282 +} 1.283 + 1.284 +static int PTRCALL 1.285 +PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, 1.286 + const char *end, const char **nextTokPtr) 1.287 +{ 1.288 + static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, 1.289 + ASCII_T, ASCII_A, ASCII_LSQB }; 1.290 + int i; 1.291 + /* CDATA[ */ 1.292 + if (end - ptr < 6 * MINBPC(enc)) 1.293 + return XML_TOK_PARTIAL; 1.294 + for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { 1.295 + if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { 1.296 + *nextTokPtr = ptr; 1.297 + return XML_TOK_INVALID; 1.298 + } 1.299 + } 1.300 + *nextTokPtr = ptr; 1.301 + return XML_TOK_CDATA_SECT_OPEN; 1.302 +} 1.303 + 1.304 +static int PTRCALL 1.305 +PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, 1.306 + const char *end, const char **nextTokPtr) 1.307 +{ 1.308 + if (ptr == end) 1.309 + return XML_TOK_NONE; 1.310 + if (MINBPC(enc) > 1) { 1.311 + size_t n = end - ptr; 1.312 + if (n & (MINBPC(enc) - 1)) { 1.313 + n &= ~(MINBPC(enc) - 1); 1.314 + if (n == 0) 1.315 + return XML_TOK_PARTIAL; 1.316 + end = ptr + n; 1.317 + } 1.318 + } 1.319 + switch (BYTE_TYPE(enc, ptr)) { 1.320 + case BT_RSQB: 1.321 + ptr += MINBPC(enc); 1.322 + if (ptr == end) 1.323 + return XML_TOK_PARTIAL; 1.324 + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 1.325 + break; 1.326 + ptr += MINBPC(enc); 1.327 + if (ptr == end) 1.328 + return XML_TOK_PARTIAL; 1.329 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.330 + ptr -= MINBPC(enc); 1.331 + break; 1.332 + } 1.333 + *nextTokPtr = ptr + MINBPC(enc); 1.334 + return XML_TOK_CDATA_SECT_CLOSE; 1.335 + case BT_CR: 1.336 + ptr += MINBPC(enc); 1.337 + if (ptr == end) 1.338 + return XML_TOK_PARTIAL; 1.339 + if (BYTE_TYPE(enc, ptr) == BT_LF) 1.340 + ptr += MINBPC(enc); 1.341 + *nextTokPtr = ptr; 1.342 + return XML_TOK_DATA_NEWLINE; 1.343 + case BT_LF: 1.344 + *nextTokPtr = ptr + MINBPC(enc); 1.345 + return XML_TOK_DATA_NEWLINE; 1.346 + INVALID_CASES(ptr, nextTokPtr) 1.347 + default: 1.348 + ptr += MINBPC(enc); 1.349 + break; 1.350 + } 1.351 + while (ptr != end) { 1.352 + switch (BYTE_TYPE(enc, ptr)) { 1.353 +#define LEAD_CASE(n) \ 1.354 + case BT_LEAD ## n: \ 1.355 + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 1.356 + *nextTokPtr = ptr; \ 1.357 + return XML_TOK_DATA_CHARS; \ 1.358 + } \ 1.359 + ptr += n; \ 1.360 + break; 1.361 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.362 +#undef LEAD_CASE 1.363 + case BT_NONXML: 1.364 + case BT_MALFORM: 1.365 + case BT_TRAIL: 1.366 + case BT_CR: 1.367 + case BT_LF: 1.368 + case BT_RSQB: 1.369 + *nextTokPtr = ptr; 1.370 + return XML_TOK_DATA_CHARS; 1.371 + default: 1.372 + ptr += MINBPC(enc); 1.373 + break; 1.374 + } 1.375 + } 1.376 + *nextTokPtr = ptr; 1.377 + return XML_TOK_DATA_CHARS; 1.378 +} 1.379 + 1.380 +/* ptr points to character following "</" */ 1.381 + 1.382 +static int PTRCALL 1.383 +PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, 1.384 + const char *end, const char **nextTokPtr) 1.385 +{ 1.386 + if (ptr == end) 1.387 + return XML_TOK_PARTIAL; 1.388 + switch (BYTE_TYPE(enc, ptr)) { 1.389 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.390 + default: 1.391 + *nextTokPtr = ptr; 1.392 + return XML_TOK_INVALID; 1.393 + } 1.394 + while (ptr != end) { 1.395 + switch (BYTE_TYPE(enc, ptr)) { 1.396 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.397 + case BT_S: case BT_CR: case BT_LF: 1.398 + for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 1.399 + switch (BYTE_TYPE(enc, ptr)) { 1.400 + case BT_S: case BT_CR: case BT_LF: 1.401 + break; 1.402 + case BT_GT: 1.403 + *nextTokPtr = ptr + MINBPC(enc); 1.404 + return XML_TOK_END_TAG; 1.405 + default: 1.406 + *nextTokPtr = ptr; 1.407 + return XML_TOK_INVALID; 1.408 + } 1.409 + } 1.410 + return XML_TOK_PARTIAL; 1.411 +#ifdef XML_NS 1.412 + case BT_COLON: 1.413 + /* no need to check qname syntax here, 1.414 + since end-tag must match exactly */ 1.415 + ptr += MINBPC(enc); 1.416 + break; 1.417 +#endif 1.418 + case BT_GT: 1.419 + *nextTokPtr = ptr + MINBPC(enc); 1.420 + return XML_TOK_END_TAG; 1.421 + default: 1.422 + *nextTokPtr = ptr; 1.423 + return XML_TOK_INVALID; 1.424 + } 1.425 + } 1.426 + return XML_TOK_PARTIAL; 1.427 +} 1.428 + 1.429 +/* ptr points to character following "&#X" */ 1.430 + 1.431 +static int PTRCALL 1.432 +PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, 1.433 + const char *end, const char **nextTokPtr) 1.434 +{ 1.435 + if (ptr != end) { 1.436 + switch (BYTE_TYPE(enc, ptr)) { 1.437 + case BT_DIGIT: 1.438 + case BT_HEX: 1.439 + break; 1.440 + default: 1.441 + *nextTokPtr = ptr; 1.442 + return XML_TOK_INVALID; 1.443 + } 1.444 + for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 1.445 + switch (BYTE_TYPE(enc, ptr)) { 1.446 + case BT_DIGIT: 1.447 + case BT_HEX: 1.448 + break; 1.449 + case BT_SEMI: 1.450 + *nextTokPtr = ptr + MINBPC(enc); 1.451 + return XML_TOK_CHAR_REF; 1.452 + default: 1.453 + *nextTokPtr = ptr; 1.454 + return XML_TOK_INVALID; 1.455 + } 1.456 + } 1.457 + } 1.458 + return XML_TOK_PARTIAL; 1.459 +} 1.460 + 1.461 +/* ptr points to character following "&#" */ 1.462 + 1.463 +static int PTRCALL 1.464 +PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, 1.465 + const char *end, const char **nextTokPtr) 1.466 +{ 1.467 + if (ptr != end) { 1.468 + if (CHAR_MATCHES(enc, ptr, ASCII_x)) 1.469 + return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.470 + switch (BYTE_TYPE(enc, ptr)) { 1.471 + case BT_DIGIT: 1.472 + break; 1.473 + default: 1.474 + *nextTokPtr = ptr; 1.475 + return XML_TOK_INVALID; 1.476 + } 1.477 + for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 1.478 + switch (BYTE_TYPE(enc, ptr)) { 1.479 + case BT_DIGIT: 1.480 + break; 1.481 + case BT_SEMI: 1.482 + *nextTokPtr = ptr + MINBPC(enc); 1.483 + return XML_TOK_CHAR_REF; 1.484 + default: 1.485 + *nextTokPtr = ptr; 1.486 + return XML_TOK_INVALID; 1.487 + } 1.488 + } 1.489 + } 1.490 + return XML_TOK_PARTIAL; 1.491 +} 1.492 + 1.493 +/* ptr points to character following "&" */ 1.494 + 1.495 +static int PTRCALL 1.496 +PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, 1.497 + const char **nextTokPtr) 1.498 +{ 1.499 + if (ptr == end) 1.500 + return XML_TOK_PARTIAL; 1.501 + switch (BYTE_TYPE(enc, ptr)) { 1.502 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.503 + case BT_NUM: 1.504 + return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.505 + default: 1.506 + *nextTokPtr = ptr; 1.507 + return XML_TOK_INVALID; 1.508 + } 1.509 + while (ptr != end) { 1.510 + switch (BYTE_TYPE(enc, ptr)) { 1.511 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.512 + case BT_SEMI: 1.513 + *nextTokPtr = ptr + MINBPC(enc); 1.514 + return XML_TOK_ENTITY_REF; 1.515 + default: 1.516 + *nextTokPtr = ptr; 1.517 + return XML_TOK_INVALID; 1.518 + } 1.519 + } 1.520 + return XML_TOK_PARTIAL; 1.521 +} 1.522 + 1.523 +/* ptr points to character following first character of attribute name */ 1.524 + 1.525 +static int PTRCALL 1.526 +PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, 1.527 + const char **nextTokPtr) 1.528 +{ 1.529 +#ifdef XML_NS 1.530 + int hadColon = 0; 1.531 +#endif 1.532 + while (ptr != end) { 1.533 + switch (BYTE_TYPE(enc, ptr)) { 1.534 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.535 +#ifdef XML_NS 1.536 + case BT_COLON: 1.537 + if (hadColon) { 1.538 + *nextTokPtr = ptr; 1.539 + return XML_TOK_INVALID; 1.540 + } 1.541 + hadColon = 1; 1.542 + ptr += MINBPC(enc); 1.543 + if (ptr == end) 1.544 + return XML_TOK_PARTIAL; 1.545 + switch (BYTE_TYPE(enc, ptr)) { 1.546 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.547 + default: 1.548 + *nextTokPtr = ptr; 1.549 + return XML_TOK_INVALID; 1.550 + } 1.551 + break; 1.552 +#endif 1.553 + case BT_S: case BT_CR: case BT_LF: 1.554 + for (;;) { 1.555 + int t; 1.556 + 1.557 + ptr += MINBPC(enc); 1.558 + if (ptr == end) 1.559 + return XML_TOK_PARTIAL; 1.560 + t = BYTE_TYPE(enc, ptr); 1.561 + if (t == BT_EQUALS) 1.562 + break; 1.563 + switch (t) { 1.564 + case BT_S: 1.565 + case BT_LF: 1.566 + case BT_CR: 1.567 + break; 1.568 + default: 1.569 + *nextTokPtr = ptr; 1.570 + return XML_TOK_INVALID; 1.571 + } 1.572 + } 1.573 + /* fall through */ 1.574 + case BT_EQUALS: 1.575 + { 1.576 + int open; 1.577 +#ifdef XML_NS 1.578 + hadColon = 0; 1.579 +#endif 1.580 + for (;;) { 1.581 + ptr += MINBPC(enc); 1.582 + if (ptr == end) 1.583 + return XML_TOK_PARTIAL; 1.584 + open = BYTE_TYPE(enc, ptr); 1.585 + if (open == BT_QUOT || open == BT_APOS) 1.586 + break; 1.587 + switch (open) { 1.588 + case BT_S: 1.589 + case BT_LF: 1.590 + case BT_CR: 1.591 + break; 1.592 + default: 1.593 + *nextTokPtr = ptr; 1.594 + return XML_TOK_INVALID; 1.595 + } 1.596 + } 1.597 + ptr += MINBPC(enc); 1.598 + /* in attribute value */ 1.599 + for (;;) { 1.600 + int t; 1.601 + if (ptr == end) 1.602 + return XML_TOK_PARTIAL; 1.603 + t = BYTE_TYPE(enc, ptr); 1.604 + if (t == open) 1.605 + break; 1.606 + switch (t) { 1.607 + INVALID_CASES(ptr, nextTokPtr) 1.608 + case BT_AMP: 1.609 + { 1.610 + int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); 1.611 + if (tok <= 0) { 1.612 + if (tok == XML_TOK_INVALID) 1.613 + *nextTokPtr = ptr; 1.614 + return tok; 1.615 + } 1.616 + break; 1.617 + } 1.618 + case BT_LT: 1.619 + *nextTokPtr = ptr; 1.620 + return XML_TOK_INVALID; 1.621 + default: 1.622 + ptr += MINBPC(enc); 1.623 + break; 1.624 + } 1.625 + } 1.626 + ptr += MINBPC(enc); 1.627 + if (ptr == end) 1.628 + return XML_TOK_PARTIAL; 1.629 + switch (BYTE_TYPE(enc, ptr)) { 1.630 + case BT_S: 1.631 + case BT_CR: 1.632 + case BT_LF: 1.633 + break; 1.634 + case BT_SOL: 1.635 + goto sol; 1.636 + case BT_GT: 1.637 + goto gt; 1.638 + default: 1.639 + *nextTokPtr = ptr; 1.640 + return XML_TOK_INVALID; 1.641 + } 1.642 + /* ptr points to closing quote */ 1.643 + for (;;) { 1.644 + ptr += MINBPC(enc); 1.645 + if (ptr == end) 1.646 + return XML_TOK_PARTIAL; 1.647 + switch (BYTE_TYPE(enc, ptr)) { 1.648 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.649 + case BT_S: case BT_CR: case BT_LF: 1.650 + continue; 1.651 + case BT_GT: 1.652 + gt: 1.653 + *nextTokPtr = ptr + MINBPC(enc); 1.654 + return XML_TOK_START_TAG_WITH_ATTS; 1.655 + case BT_SOL: 1.656 + sol: 1.657 + ptr += MINBPC(enc); 1.658 + if (ptr == end) 1.659 + return XML_TOK_PARTIAL; 1.660 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.661 + *nextTokPtr = ptr; 1.662 + return XML_TOK_INVALID; 1.663 + } 1.664 + *nextTokPtr = ptr + MINBPC(enc); 1.665 + return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; 1.666 + default: 1.667 + *nextTokPtr = ptr; 1.668 + return XML_TOK_INVALID; 1.669 + } 1.670 + break; 1.671 + } 1.672 + break; 1.673 + } 1.674 + default: 1.675 + *nextTokPtr = ptr; 1.676 + return XML_TOK_INVALID; 1.677 + } 1.678 + } 1.679 + return XML_TOK_PARTIAL; 1.680 +} 1.681 + 1.682 +/* ptr points to character following "<" */ 1.683 + 1.684 +static int PTRCALL 1.685 +PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, 1.686 + const char **nextTokPtr) 1.687 +{ 1.688 +#ifdef XML_NS 1.689 + int hadColon; 1.690 +#endif 1.691 + if (ptr == end) 1.692 + return XML_TOK_PARTIAL; 1.693 + switch (BYTE_TYPE(enc, ptr)) { 1.694 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.695 + case BT_EXCL: 1.696 + if ((ptr += MINBPC(enc)) == end) 1.697 + return XML_TOK_PARTIAL; 1.698 + switch (BYTE_TYPE(enc, ptr)) { 1.699 + case BT_MINUS: 1.700 + return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.701 + case BT_LSQB: 1.702 + return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), 1.703 + end, nextTokPtr); 1.704 + } 1.705 + *nextTokPtr = ptr; 1.706 + return XML_TOK_INVALID; 1.707 + case BT_QUEST: 1.708 + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.709 + case BT_SOL: 1.710 + return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.711 + default: 1.712 + *nextTokPtr = ptr; 1.713 + return XML_TOK_INVALID; 1.714 + } 1.715 +#ifdef XML_NS 1.716 + hadColon = 0; 1.717 +#endif 1.718 + /* we have a start-tag */ 1.719 + while (ptr != end) { 1.720 + switch (BYTE_TYPE(enc, ptr)) { 1.721 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.722 +#ifdef XML_NS 1.723 + case BT_COLON: 1.724 + if (hadColon) { 1.725 + *nextTokPtr = ptr; 1.726 + return XML_TOK_INVALID; 1.727 + } 1.728 + hadColon = 1; 1.729 + ptr += MINBPC(enc); 1.730 + if (ptr == end) 1.731 + return XML_TOK_PARTIAL; 1.732 + switch (BYTE_TYPE(enc, ptr)) { 1.733 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.734 + default: 1.735 + *nextTokPtr = ptr; 1.736 + return XML_TOK_INVALID; 1.737 + } 1.738 + break; 1.739 +#endif 1.740 + case BT_S: case BT_CR: case BT_LF: 1.741 + { 1.742 + ptr += MINBPC(enc); 1.743 + while (ptr != end) { 1.744 + switch (BYTE_TYPE(enc, ptr)) { 1.745 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.746 + case BT_GT: 1.747 + goto gt; 1.748 + case BT_SOL: 1.749 + goto sol; 1.750 + case BT_S: case BT_CR: case BT_LF: 1.751 + ptr += MINBPC(enc); 1.752 + continue; 1.753 + default: 1.754 + *nextTokPtr = ptr; 1.755 + return XML_TOK_INVALID; 1.756 + } 1.757 + return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); 1.758 + } 1.759 + return XML_TOK_PARTIAL; 1.760 + } 1.761 + case BT_GT: 1.762 + gt: 1.763 + *nextTokPtr = ptr + MINBPC(enc); 1.764 + return XML_TOK_START_TAG_NO_ATTS; 1.765 + case BT_SOL: 1.766 + sol: 1.767 + ptr += MINBPC(enc); 1.768 + if (ptr == end) 1.769 + return XML_TOK_PARTIAL; 1.770 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.771 + *nextTokPtr = ptr; 1.772 + return XML_TOK_INVALID; 1.773 + } 1.774 + *nextTokPtr = ptr + MINBPC(enc); 1.775 + return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 1.776 + default: 1.777 + *nextTokPtr = ptr; 1.778 + return XML_TOK_INVALID; 1.779 + } 1.780 + } 1.781 + return XML_TOK_PARTIAL; 1.782 +} 1.783 + 1.784 +static int PTRCALL 1.785 +PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, 1.786 + const char **nextTokPtr) 1.787 +{ 1.788 + if (ptr == end) 1.789 + return XML_TOK_NONE; 1.790 + if (MINBPC(enc) > 1) { 1.791 + size_t n = end - ptr; 1.792 + if (n & (MINBPC(enc) - 1)) { 1.793 + n &= ~(MINBPC(enc) - 1); 1.794 + if (n == 0) 1.795 + return XML_TOK_PARTIAL; 1.796 + end = ptr + n; 1.797 + } 1.798 + } 1.799 + switch (BYTE_TYPE(enc, ptr)) { 1.800 + case BT_LT: 1.801 + return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.802 + case BT_AMP: 1.803 + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.804 + case BT_CR: 1.805 + ptr += MINBPC(enc); 1.806 + if (ptr == end) 1.807 + return XML_TOK_TRAILING_CR; 1.808 + if (BYTE_TYPE(enc, ptr) == BT_LF) 1.809 + ptr += MINBPC(enc); 1.810 + *nextTokPtr = ptr; 1.811 + return XML_TOK_DATA_NEWLINE; 1.812 + case BT_LF: 1.813 + *nextTokPtr = ptr + MINBPC(enc); 1.814 + return XML_TOK_DATA_NEWLINE; 1.815 + case BT_RSQB: 1.816 + ptr += MINBPC(enc); 1.817 + if (ptr == end) 1.818 + return XML_TOK_TRAILING_RSQB; 1.819 + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 1.820 + break; 1.821 + ptr += MINBPC(enc); 1.822 + if (ptr == end) 1.823 + return XML_TOK_TRAILING_RSQB; 1.824 + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.825 + ptr -= MINBPC(enc); 1.826 + break; 1.827 + } 1.828 + *nextTokPtr = ptr; 1.829 + return XML_TOK_INVALID; 1.830 + INVALID_CASES(ptr, nextTokPtr) 1.831 + default: 1.832 + ptr += MINBPC(enc); 1.833 + break; 1.834 + } 1.835 + while (ptr != end) { 1.836 + switch (BYTE_TYPE(enc, ptr)) { 1.837 +#define LEAD_CASE(n) \ 1.838 + case BT_LEAD ## n: \ 1.839 + if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 1.840 + *nextTokPtr = ptr; \ 1.841 + return XML_TOK_DATA_CHARS; \ 1.842 + } \ 1.843 + ptr += n; \ 1.844 + break; 1.845 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.846 +#undef LEAD_CASE 1.847 + case BT_RSQB: 1.848 + if (ptr + MINBPC(enc) != end) { 1.849 + if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { 1.850 + ptr += MINBPC(enc); 1.851 + break; 1.852 + } 1.853 + if (ptr + 2*MINBPC(enc) != end) { 1.854 + if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { 1.855 + ptr += MINBPC(enc); 1.856 + break; 1.857 + } 1.858 + *nextTokPtr = ptr + 2*MINBPC(enc); 1.859 + return XML_TOK_INVALID; 1.860 + } 1.861 + } 1.862 + /* fall through */ 1.863 + case BT_AMP: 1.864 + case BT_LT: 1.865 + case BT_NONXML: 1.866 + case BT_MALFORM: 1.867 + case BT_TRAIL: 1.868 + case BT_CR: 1.869 + case BT_LF: 1.870 + *nextTokPtr = ptr; 1.871 + return XML_TOK_DATA_CHARS; 1.872 + default: 1.873 + ptr += MINBPC(enc); 1.874 + break; 1.875 + } 1.876 + } 1.877 + *nextTokPtr = ptr; 1.878 + return XML_TOK_DATA_CHARS; 1.879 +} 1.880 + 1.881 +/* ptr points to character following "%" */ 1.882 + 1.883 +static int PTRCALL 1.884 +PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, 1.885 + const char **nextTokPtr) 1.886 +{ 1.887 + if (ptr == end) 1.888 + return -XML_TOK_PERCENT; 1.889 + switch (BYTE_TYPE(enc, ptr)) { 1.890 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.891 + case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: 1.892 + *nextTokPtr = ptr; 1.893 + return XML_TOK_PERCENT; 1.894 + default: 1.895 + *nextTokPtr = ptr; 1.896 + return XML_TOK_INVALID; 1.897 + } 1.898 + while (ptr != end) { 1.899 + switch (BYTE_TYPE(enc, ptr)) { 1.900 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.901 + case BT_SEMI: 1.902 + *nextTokPtr = ptr + MINBPC(enc); 1.903 + return XML_TOK_PARAM_ENTITY_REF; 1.904 + default: 1.905 + *nextTokPtr = ptr; 1.906 + return XML_TOK_INVALID; 1.907 + } 1.908 + } 1.909 + return XML_TOK_PARTIAL; 1.910 +} 1.911 + 1.912 +static int PTRCALL 1.913 +PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, 1.914 + const char **nextTokPtr) 1.915 +{ 1.916 + if (ptr == end) 1.917 + return XML_TOK_PARTIAL; 1.918 + switch (BYTE_TYPE(enc, ptr)) { 1.919 + CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1.920 + default: 1.921 + *nextTokPtr = ptr; 1.922 + return XML_TOK_INVALID; 1.923 + } 1.924 + while (ptr != end) { 1.925 + switch (BYTE_TYPE(enc, ptr)) { 1.926 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.927 + case BT_CR: case BT_LF: case BT_S: 1.928 + case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: 1.929 + *nextTokPtr = ptr; 1.930 + return XML_TOK_POUND_NAME; 1.931 + default: 1.932 + *nextTokPtr = ptr; 1.933 + return XML_TOK_INVALID; 1.934 + } 1.935 + } 1.936 + return -XML_TOK_POUND_NAME; 1.937 +} 1.938 + 1.939 +static int PTRCALL 1.940 +PREFIX(scanLit)(int open, const ENCODING *enc, 1.941 + const char *ptr, const char *end, 1.942 + const char **nextTokPtr) 1.943 +{ 1.944 + while (ptr != end) { 1.945 + int t = BYTE_TYPE(enc, ptr); 1.946 + switch (t) { 1.947 + INVALID_CASES(ptr, nextTokPtr) 1.948 + case BT_QUOT: 1.949 + case BT_APOS: 1.950 + ptr += MINBPC(enc); 1.951 + if (t != open) 1.952 + break; 1.953 + if (ptr == end) 1.954 + return -XML_TOK_LITERAL; 1.955 + *nextTokPtr = ptr; 1.956 + switch (BYTE_TYPE(enc, ptr)) { 1.957 + case BT_S: case BT_CR: case BT_LF: 1.958 + case BT_GT: case BT_PERCNT: case BT_LSQB: 1.959 + return XML_TOK_LITERAL; 1.960 + default: 1.961 + return XML_TOK_INVALID; 1.962 + } 1.963 + default: 1.964 + ptr += MINBPC(enc); 1.965 + break; 1.966 + } 1.967 + } 1.968 + return XML_TOK_PARTIAL; 1.969 +} 1.970 + 1.971 +static int PTRCALL 1.972 +PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, 1.973 + const char **nextTokPtr) 1.974 +{ 1.975 + int tok; 1.976 + if (ptr == end) 1.977 + return XML_TOK_NONE; 1.978 + if (MINBPC(enc) > 1) { 1.979 + size_t n = end - ptr; 1.980 + if (n & (MINBPC(enc) - 1)) { 1.981 + n &= ~(MINBPC(enc) - 1); 1.982 + if (n == 0) 1.983 + return XML_TOK_PARTIAL; 1.984 + end = ptr + n; 1.985 + } 1.986 + } 1.987 + switch (BYTE_TYPE(enc, ptr)) { 1.988 + case BT_QUOT: 1.989 + return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 1.990 + case BT_APOS: 1.991 + return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 1.992 + case BT_LT: 1.993 + { 1.994 + ptr += MINBPC(enc); 1.995 + if (ptr == end) 1.996 + return XML_TOK_PARTIAL; 1.997 + switch (BYTE_TYPE(enc, ptr)) { 1.998 + case BT_EXCL: 1.999 + return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.1000 + case BT_QUEST: 1.1001 + return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.1002 + case BT_NMSTRT: 1.1003 + case BT_HEX: 1.1004 + case BT_NONASCII: 1.1005 + case BT_LEAD2: 1.1006 + case BT_LEAD3: 1.1007 + case BT_LEAD4: 1.1008 + *nextTokPtr = ptr - MINBPC(enc); 1.1009 + return XML_TOK_INSTANCE_START; 1.1010 + } 1.1011 + *nextTokPtr = ptr; 1.1012 + return XML_TOK_INVALID; 1.1013 + } 1.1014 + case BT_CR: 1.1015 + if (ptr + MINBPC(enc) == end) { 1.1016 + *nextTokPtr = end; 1.1017 + /* indicate that this might be part of a CR/LF pair */ 1.1018 + return -XML_TOK_PROLOG_S; 1.1019 + } 1.1020 + /* fall through */ 1.1021 + case BT_S: case BT_LF: 1.1022 + for (;;) { 1.1023 + ptr += MINBPC(enc); 1.1024 + if (ptr == end) 1.1025 + break; 1.1026 + switch (BYTE_TYPE(enc, ptr)) { 1.1027 + case BT_S: case BT_LF: 1.1028 + break; 1.1029 + case BT_CR: 1.1030 + /* don't split CR/LF pair */ 1.1031 + if (ptr + MINBPC(enc) != end) 1.1032 + break; 1.1033 + /* fall through */ 1.1034 + default: 1.1035 + *nextTokPtr = ptr; 1.1036 + return XML_TOK_PROLOG_S; 1.1037 + } 1.1038 + } 1.1039 + *nextTokPtr = ptr; 1.1040 + return XML_TOK_PROLOG_S; 1.1041 + case BT_PERCNT: 1.1042 + return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.1043 + case BT_COMMA: 1.1044 + *nextTokPtr = ptr + MINBPC(enc); 1.1045 + return XML_TOK_COMMA; 1.1046 + case BT_LSQB: 1.1047 + *nextTokPtr = ptr + MINBPC(enc); 1.1048 + return XML_TOK_OPEN_BRACKET; 1.1049 + case BT_RSQB: 1.1050 + ptr += MINBPC(enc); 1.1051 + if (ptr == end) 1.1052 + return -XML_TOK_CLOSE_BRACKET; 1.1053 + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1.1054 + if (ptr + MINBPC(enc) == end) 1.1055 + return XML_TOK_PARTIAL; 1.1056 + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { 1.1057 + *nextTokPtr = ptr + 2*MINBPC(enc); 1.1058 + return XML_TOK_COND_SECT_CLOSE; 1.1059 + } 1.1060 + } 1.1061 + *nextTokPtr = ptr; 1.1062 + return XML_TOK_CLOSE_BRACKET; 1.1063 + case BT_LPAR: 1.1064 + *nextTokPtr = ptr + MINBPC(enc); 1.1065 + return XML_TOK_OPEN_PAREN; 1.1066 + case BT_RPAR: 1.1067 + ptr += MINBPC(enc); 1.1068 + if (ptr == end) 1.1069 + return -XML_TOK_CLOSE_PAREN; 1.1070 + switch (BYTE_TYPE(enc, ptr)) { 1.1071 + case BT_AST: 1.1072 + *nextTokPtr = ptr + MINBPC(enc); 1.1073 + return XML_TOK_CLOSE_PAREN_ASTERISK; 1.1074 + case BT_QUEST: 1.1075 + *nextTokPtr = ptr + MINBPC(enc); 1.1076 + return XML_TOK_CLOSE_PAREN_QUESTION; 1.1077 + case BT_PLUS: 1.1078 + *nextTokPtr = ptr + MINBPC(enc); 1.1079 + return XML_TOK_CLOSE_PAREN_PLUS; 1.1080 + case BT_CR: case BT_LF: case BT_S: 1.1081 + case BT_GT: case BT_COMMA: case BT_VERBAR: 1.1082 + case BT_RPAR: 1.1083 + *nextTokPtr = ptr; 1.1084 + return XML_TOK_CLOSE_PAREN; 1.1085 + } 1.1086 + *nextTokPtr = ptr; 1.1087 + return XML_TOK_INVALID; 1.1088 + case BT_VERBAR: 1.1089 + *nextTokPtr = ptr + MINBPC(enc); 1.1090 + return XML_TOK_OR; 1.1091 + case BT_GT: 1.1092 + *nextTokPtr = ptr + MINBPC(enc); 1.1093 + return XML_TOK_DECL_CLOSE; 1.1094 + case BT_NUM: 1.1095 + return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.1096 +#define LEAD_CASE(n) \ 1.1097 + case BT_LEAD ## n: \ 1.1098 + if (end - ptr < n) \ 1.1099 + return XML_TOK_PARTIAL_CHAR; \ 1.1100 + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ 1.1101 + ptr += n; \ 1.1102 + tok = XML_TOK_NAME; \ 1.1103 + break; \ 1.1104 + } \ 1.1105 + if (IS_NAME_CHAR(enc, ptr, n)) { \ 1.1106 + ptr += n; \ 1.1107 + tok = XML_TOK_NMTOKEN; \ 1.1108 + break; \ 1.1109 + } \ 1.1110 + *nextTokPtr = ptr; \ 1.1111 + return XML_TOK_INVALID; 1.1112 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.1113 +#undef LEAD_CASE 1.1114 + case BT_NMSTRT: 1.1115 + case BT_HEX: 1.1116 + tok = XML_TOK_NAME; 1.1117 + ptr += MINBPC(enc); 1.1118 + break; 1.1119 + case BT_DIGIT: 1.1120 + case BT_NAME: 1.1121 + case BT_MINUS: 1.1122 +#ifdef XML_NS 1.1123 + case BT_COLON: 1.1124 +#endif 1.1125 + tok = XML_TOK_NMTOKEN; 1.1126 + ptr += MINBPC(enc); 1.1127 + break; 1.1128 + case BT_NONASCII: 1.1129 + if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 1.1130 + ptr += MINBPC(enc); 1.1131 + tok = XML_TOK_NAME; 1.1132 + break; 1.1133 + } 1.1134 + if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1.1135 + ptr += MINBPC(enc); 1.1136 + tok = XML_TOK_NMTOKEN; 1.1137 + break; 1.1138 + } 1.1139 + /* fall through */ 1.1140 + default: 1.1141 + *nextTokPtr = ptr; 1.1142 + return XML_TOK_INVALID; 1.1143 + } 1.1144 + while (ptr != end) { 1.1145 + switch (BYTE_TYPE(enc, ptr)) { 1.1146 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.1147 + case BT_GT: case BT_RPAR: case BT_COMMA: 1.1148 + case BT_VERBAR: case BT_LSQB: case BT_PERCNT: 1.1149 + case BT_S: case BT_CR: case BT_LF: 1.1150 + *nextTokPtr = ptr; 1.1151 + return tok; 1.1152 +#ifdef XML_NS 1.1153 + case BT_COLON: 1.1154 + ptr += MINBPC(enc); 1.1155 + switch (tok) { 1.1156 + case XML_TOK_NAME: 1.1157 + if (ptr == end) 1.1158 + return XML_TOK_PARTIAL; 1.1159 + tok = XML_TOK_PREFIXED_NAME; 1.1160 + switch (BYTE_TYPE(enc, ptr)) { 1.1161 + CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1.1162 + default: 1.1163 + tok = XML_TOK_NMTOKEN; 1.1164 + break; 1.1165 + } 1.1166 + break; 1.1167 + case XML_TOK_PREFIXED_NAME: 1.1168 + tok = XML_TOK_NMTOKEN; 1.1169 + break; 1.1170 + } 1.1171 + break; 1.1172 +#endif 1.1173 + case BT_PLUS: 1.1174 + if (tok == XML_TOK_NMTOKEN) { 1.1175 + *nextTokPtr = ptr; 1.1176 + return XML_TOK_INVALID; 1.1177 + } 1.1178 + *nextTokPtr = ptr + MINBPC(enc); 1.1179 + return XML_TOK_NAME_PLUS; 1.1180 + case BT_AST: 1.1181 + if (tok == XML_TOK_NMTOKEN) { 1.1182 + *nextTokPtr = ptr; 1.1183 + return XML_TOK_INVALID; 1.1184 + } 1.1185 + *nextTokPtr = ptr + MINBPC(enc); 1.1186 + return XML_TOK_NAME_ASTERISK; 1.1187 + case BT_QUEST: 1.1188 + if (tok == XML_TOK_NMTOKEN) { 1.1189 + *nextTokPtr = ptr; 1.1190 + return XML_TOK_INVALID; 1.1191 + } 1.1192 + *nextTokPtr = ptr + MINBPC(enc); 1.1193 + return XML_TOK_NAME_QUESTION; 1.1194 + default: 1.1195 + *nextTokPtr = ptr; 1.1196 + return XML_TOK_INVALID; 1.1197 + } 1.1198 + } 1.1199 + return -tok; 1.1200 +} 1.1201 + 1.1202 +static int PTRCALL 1.1203 +PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, 1.1204 + const char *end, const char **nextTokPtr) 1.1205 +{ 1.1206 + const char *start; 1.1207 + if (ptr == end) 1.1208 + return XML_TOK_NONE; 1.1209 + start = ptr; 1.1210 + while (ptr != end) { 1.1211 + switch (BYTE_TYPE(enc, ptr)) { 1.1212 +#define LEAD_CASE(n) \ 1.1213 + case BT_LEAD ## n: ptr += n; break; 1.1214 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.1215 +#undef LEAD_CASE 1.1216 + case BT_AMP: 1.1217 + if (ptr == start) 1.1218 + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.1219 + *nextTokPtr = ptr; 1.1220 + return XML_TOK_DATA_CHARS; 1.1221 + case BT_LT: 1.1222 + /* this is for inside entity references */ 1.1223 + *nextTokPtr = ptr; 1.1224 + return XML_TOK_INVALID; 1.1225 + case BT_LF: 1.1226 + if (ptr == start) { 1.1227 + *nextTokPtr = ptr + MINBPC(enc); 1.1228 + return XML_TOK_DATA_NEWLINE; 1.1229 + } 1.1230 + *nextTokPtr = ptr; 1.1231 + return XML_TOK_DATA_CHARS; 1.1232 + case BT_CR: 1.1233 + if (ptr == start) { 1.1234 + ptr += MINBPC(enc); 1.1235 + if (ptr == end) 1.1236 + return XML_TOK_TRAILING_CR; 1.1237 + if (BYTE_TYPE(enc, ptr) == BT_LF) 1.1238 + ptr += MINBPC(enc); 1.1239 + *nextTokPtr = ptr; 1.1240 + return XML_TOK_DATA_NEWLINE; 1.1241 + } 1.1242 + *nextTokPtr = ptr; 1.1243 + return XML_TOK_DATA_CHARS; 1.1244 + case BT_S: 1.1245 + if (ptr == start) { 1.1246 + *nextTokPtr = ptr + MINBPC(enc); 1.1247 + return XML_TOK_ATTRIBUTE_VALUE_S; 1.1248 + } 1.1249 + *nextTokPtr = ptr; 1.1250 + return XML_TOK_DATA_CHARS; 1.1251 + default: 1.1252 + ptr += MINBPC(enc); 1.1253 + break; 1.1254 + } 1.1255 + } 1.1256 + *nextTokPtr = ptr; 1.1257 + return XML_TOK_DATA_CHARS; 1.1258 +} 1.1259 + 1.1260 +static int PTRCALL 1.1261 +PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, 1.1262 + const char *end, const char **nextTokPtr) 1.1263 +{ 1.1264 + const char *start; 1.1265 + if (ptr == end) 1.1266 + return XML_TOK_NONE; 1.1267 + start = ptr; 1.1268 + while (ptr != end) { 1.1269 + switch (BYTE_TYPE(enc, ptr)) { 1.1270 +#define LEAD_CASE(n) \ 1.1271 + case BT_LEAD ## n: ptr += n; break; 1.1272 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.1273 +#undef LEAD_CASE 1.1274 + case BT_AMP: 1.1275 + if (ptr == start) 1.1276 + return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1.1277 + *nextTokPtr = ptr; 1.1278 + return XML_TOK_DATA_CHARS; 1.1279 + case BT_PERCNT: 1.1280 + if (ptr == start) { 1.1281 + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), 1.1282 + end, nextTokPtr); 1.1283 + return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 1.1284 + } 1.1285 + *nextTokPtr = ptr; 1.1286 + return XML_TOK_DATA_CHARS; 1.1287 + case BT_LF: 1.1288 + if (ptr == start) { 1.1289 + *nextTokPtr = ptr + MINBPC(enc); 1.1290 + return XML_TOK_DATA_NEWLINE; 1.1291 + } 1.1292 + *nextTokPtr = ptr; 1.1293 + return XML_TOK_DATA_CHARS; 1.1294 + case BT_CR: 1.1295 + if (ptr == start) { 1.1296 + ptr += MINBPC(enc); 1.1297 + if (ptr == end) 1.1298 + return XML_TOK_TRAILING_CR; 1.1299 + if (BYTE_TYPE(enc, ptr) == BT_LF) 1.1300 + ptr += MINBPC(enc); 1.1301 + *nextTokPtr = ptr; 1.1302 + return XML_TOK_DATA_NEWLINE; 1.1303 + } 1.1304 + *nextTokPtr = ptr; 1.1305 + return XML_TOK_DATA_CHARS; 1.1306 + default: 1.1307 + ptr += MINBPC(enc); 1.1308 + break; 1.1309 + } 1.1310 + } 1.1311 + *nextTokPtr = ptr; 1.1312 + return XML_TOK_DATA_CHARS; 1.1313 +} 1.1314 + 1.1315 +#ifdef XML_DTD 1.1316 + 1.1317 +static int PTRCALL 1.1318 +PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, 1.1319 + const char *end, const char **nextTokPtr) 1.1320 +{ 1.1321 + int level = 0; 1.1322 + if (MINBPC(enc) > 1) { 1.1323 + size_t n = end - ptr; 1.1324 + if (n & (MINBPC(enc) - 1)) { 1.1325 + n &= ~(MINBPC(enc) - 1); 1.1326 + end = ptr + n; 1.1327 + } 1.1328 + } 1.1329 + while (ptr != end) { 1.1330 + switch (BYTE_TYPE(enc, ptr)) { 1.1331 + INVALID_CASES(ptr, nextTokPtr) 1.1332 + case BT_LT: 1.1333 + if ((ptr += MINBPC(enc)) == end) 1.1334 + return XML_TOK_PARTIAL; 1.1335 + if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { 1.1336 + if ((ptr += MINBPC(enc)) == end) 1.1337 + return XML_TOK_PARTIAL; 1.1338 + if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { 1.1339 + ++level; 1.1340 + ptr += MINBPC(enc); 1.1341 + } 1.1342 + } 1.1343 + break; 1.1344 + case BT_RSQB: 1.1345 + if ((ptr += MINBPC(enc)) == end) 1.1346 + return XML_TOK_PARTIAL; 1.1347 + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1.1348 + if ((ptr += MINBPC(enc)) == end) 1.1349 + return XML_TOK_PARTIAL; 1.1350 + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1.1351 + ptr += MINBPC(enc); 1.1352 + if (level == 0) { 1.1353 + *nextTokPtr = ptr; 1.1354 + return XML_TOK_IGNORE_SECT; 1.1355 + } 1.1356 + --level; 1.1357 + } 1.1358 + } 1.1359 + break; 1.1360 + default: 1.1361 + ptr += MINBPC(enc); 1.1362 + break; 1.1363 + } 1.1364 + } 1.1365 + return XML_TOK_PARTIAL; 1.1366 +} 1.1367 + 1.1368 +#endif /* XML_DTD */ 1.1369 + 1.1370 +static int PTRCALL 1.1371 +PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, 1.1372 + const char **badPtr) 1.1373 +{ 1.1374 + ptr += MINBPC(enc); 1.1375 + end -= MINBPC(enc); 1.1376 + for (; ptr != end; ptr += MINBPC(enc)) { 1.1377 + switch (BYTE_TYPE(enc, ptr)) { 1.1378 + case BT_DIGIT: 1.1379 + case BT_HEX: 1.1380 + case BT_MINUS: 1.1381 + case BT_APOS: 1.1382 + case BT_LPAR: 1.1383 + case BT_RPAR: 1.1384 + case BT_PLUS: 1.1385 + case BT_COMMA: 1.1386 + case BT_SOL: 1.1387 + case BT_EQUALS: 1.1388 + case BT_QUEST: 1.1389 + case BT_CR: 1.1390 + case BT_LF: 1.1391 + case BT_SEMI: 1.1392 + case BT_EXCL: 1.1393 + case BT_AST: 1.1394 + case BT_PERCNT: 1.1395 + case BT_NUM: 1.1396 +#ifdef XML_NS 1.1397 + case BT_COLON: 1.1398 +#endif 1.1399 + break; 1.1400 + case BT_S: 1.1401 + if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { 1.1402 + *badPtr = ptr; 1.1403 + return 0; 1.1404 + } 1.1405 + break; 1.1406 + case BT_NAME: 1.1407 + case BT_NMSTRT: 1.1408 + if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) 1.1409 + break; 1.1410 + default: 1.1411 + switch (BYTE_TO_ASCII(enc, ptr)) { 1.1412 + case 0x24: /* $ */ 1.1413 + case 0x40: /* @ */ 1.1414 + break; 1.1415 + default: 1.1416 + *badPtr = ptr; 1.1417 + return 0; 1.1418 + } 1.1419 + break; 1.1420 + } 1.1421 + } 1.1422 + return 1; 1.1423 +} 1.1424 + 1.1425 +/* This must only be called for a well-formed start-tag or empty 1.1426 + element tag. Returns the number of attributes. Pointers to the 1.1427 + first attsMax attributes are stored in atts. 1.1428 +*/ 1.1429 + 1.1430 +static int PTRCALL 1.1431 +PREFIX(getAtts)(const ENCODING *enc, const char *ptr, 1.1432 + int attsMax, ATTRIBUTE *atts) 1.1433 +{ 1.1434 + enum { other, inName, inValue } state = inName; 1.1435 + int nAtts = 0; 1.1436 + int open = 0; /* defined when state == inValue; 1.1437 + initialization just to shut up compilers */ 1.1438 + 1.1439 + for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { 1.1440 + switch (BYTE_TYPE(enc, ptr)) { 1.1441 +#define START_NAME \ 1.1442 + if (state == other) { \ 1.1443 + if (nAtts < attsMax) { \ 1.1444 + atts[nAtts].name = ptr; \ 1.1445 + atts[nAtts].normalized = 1; \ 1.1446 + } \ 1.1447 + state = inName; \ 1.1448 + } 1.1449 +#define LEAD_CASE(n) \ 1.1450 + case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; 1.1451 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.1452 +#undef LEAD_CASE 1.1453 + case BT_NONASCII: 1.1454 + case BT_NMSTRT: 1.1455 + case BT_HEX: 1.1456 + START_NAME 1.1457 + break; 1.1458 +#undef START_NAME 1.1459 + case BT_QUOT: 1.1460 + if (state != inValue) { 1.1461 + if (nAtts < attsMax) 1.1462 + atts[nAtts].valuePtr = ptr + MINBPC(enc); 1.1463 + state = inValue; 1.1464 + open = BT_QUOT; 1.1465 + } 1.1466 + else if (open == BT_QUOT) { 1.1467 + state = other; 1.1468 + if (nAtts < attsMax) 1.1469 + atts[nAtts].valueEnd = ptr; 1.1470 + nAtts++; 1.1471 + } 1.1472 + break; 1.1473 + case BT_APOS: 1.1474 + if (state != inValue) { 1.1475 + if (nAtts < attsMax) 1.1476 + atts[nAtts].valuePtr = ptr + MINBPC(enc); 1.1477 + state = inValue; 1.1478 + open = BT_APOS; 1.1479 + } 1.1480 + else if (open == BT_APOS) { 1.1481 + state = other; 1.1482 + if (nAtts < attsMax) 1.1483 + atts[nAtts].valueEnd = ptr; 1.1484 + nAtts++; 1.1485 + } 1.1486 + break; 1.1487 + case BT_AMP: 1.1488 + if (nAtts < attsMax) 1.1489 + atts[nAtts].normalized = 0; 1.1490 + break; 1.1491 + case BT_S: 1.1492 + if (state == inName) 1.1493 + state = other; 1.1494 + else if (state == inValue 1.1495 + && nAtts < attsMax 1.1496 + && atts[nAtts].normalized 1.1497 + && (ptr == atts[nAtts].valuePtr 1.1498 + || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE 1.1499 + || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE 1.1500 + || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) 1.1501 + atts[nAtts].normalized = 0; 1.1502 + break; 1.1503 + case BT_CR: case BT_LF: 1.1504 + /* This case ensures that the first attribute name is counted 1.1505 + Apart from that we could just change state on the quote. */ 1.1506 + if (state == inName) 1.1507 + state = other; 1.1508 + else if (state == inValue && nAtts < attsMax) 1.1509 + atts[nAtts].normalized = 0; 1.1510 + break; 1.1511 + case BT_GT: 1.1512 + case BT_SOL: 1.1513 + if (state != inValue) 1.1514 + return nAtts; 1.1515 + break; 1.1516 + default: 1.1517 + break; 1.1518 + } 1.1519 + } 1.1520 + /* not reached */ 1.1521 +} 1.1522 + 1.1523 +static int PTRFASTCALL 1.1524 +PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) 1.1525 +{ 1.1526 + int result = 0; 1.1527 + /* skip &# */ 1.1528 + ptr += 2*MINBPC(enc); 1.1529 + if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1.1530 + for (ptr += MINBPC(enc); 1.1531 + !CHAR_MATCHES(enc, ptr, ASCII_SEMI); 1.1532 + ptr += MINBPC(enc)) { 1.1533 + int c = BYTE_TO_ASCII(enc, ptr); 1.1534 + switch (c) { 1.1535 + case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: 1.1536 + case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: 1.1537 + result <<= 4; 1.1538 + result |= (c - ASCII_0); 1.1539 + break; 1.1540 + case ASCII_A: case ASCII_B: case ASCII_C: 1.1541 + case ASCII_D: case ASCII_E: case ASCII_F: 1.1542 + result <<= 4; 1.1543 + result += 10 + (c - ASCII_A); 1.1544 + break; 1.1545 + case ASCII_a: case ASCII_b: case ASCII_c: 1.1546 + case ASCII_d: case ASCII_e: case ASCII_f: 1.1547 + result <<= 4; 1.1548 + result += 10 + (c - ASCII_a); 1.1549 + break; 1.1550 + } 1.1551 + if (result >= 0x110000) 1.1552 + return -1; 1.1553 + } 1.1554 + } 1.1555 + else { 1.1556 + for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 1.1557 + int c = BYTE_TO_ASCII(enc, ptr); 1.1558 + result *= 10; 1.1559 + result += (c - ASCII_0); 1.1560 + if (result >= 0x110000) 1.1561 + return -1; 1.1562 + } 1.1563 + } 1.1564 + return checkCharRefNumber(result); 1.1565 +} 1.1566 + 1.1567 +static int PTRCALL 1.1568 +PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, 1.1569 + const char *end) 1.1570 +{ 1.1571 + switch ((end - ptr)/MINBPC(enc)) { 1.1572 + case 2: 1.1573 + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { 1.1574 + switch (BYTE_TO_ASCII(enc, ptr)) { 1.1575 + case ASCII_l: 1.1576 + return ASCII_LT; 1.1577 + case ASCII_g: 1.1578 + return ASCII_GT; 1.1579 + } 1.1580 + } 1.1581 + break; 1.1582 + case 3: 1.1583 + if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 1.1584 + ptr += MINBPC(enc); 1.1585 + if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 1.1586 + ptr += MINBPC(enc); 1.1587 + if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1.1588 + return ASCII_AMP; 1.1589 + } 1.1590 + } 1.1591 + break; 1.1592 + case 4: 1.1593 + switch (BYTE_TO_ASCII(enc, ptr)) { 1.1594 + case ASCII_q: 1.1595 + ptr += MINBPC(enc); 1.1596 + if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 1.1597 + ptr += MINBPC(enc); 1.1598 + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1.1599 + ptr += MINBPC(enc); 1.1600 + if (CHAR_MATCHES(enc, ptr, ASCII_t)) 1.1601 + return ASCII_QUOT; 1.1602 + } 1.1603 + } 1.1604 + break; 1.1605 + case ASCII_a: 1.1606 + ptr += MINBPC(enc); 1.1607 + if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 1.1608 + ptr += MINBPC(enc); 1.1609 + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1.1610 + ptr += MINBPC(enc); 1.1611 + if (CHAR_MATCHES(enc, ptr, ASCII_s)) 1.1612 + return ASCII_APOS; 1.1613 + } 1.1614 + } 1.1615 + break; 1.1616 + } 1.1617 + } 1.1618 + return 0; 1.1619 +} 1.1620 + 1.1621 +static int PTRCALL 1.1622 +PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) 1.1623 +{ 1.1624 + for (;;) { 1.1625 + switch (BYTE_TYPE(enc, ptr1)) { 1.1626 +#define LEAD_CASE(n) \ 1.1627 + case BT_LEAD ## n: \ 1.1628 + if (*ptr1++ != *ptr2++) \ 1.1629 + return 0; 1.1630 + LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) 1.1631 +#undef LEAD_CASE 1.1632 + /* fall through */ 1.1633 + if (*ptr1++ != *ptr2++) 1.1634 + return 0; 1.1635 + break; 1.1636 + case BT_NONASCII: 1.1637 + case BT_NMSTRT: 1.1638 +#ifdef XML_NS 1.1639 + case BT_COLON: 1.1640 +#endif 1.1641 + case BT_HEX: 1.1642 + case BT_DIGIT: 1.1643 + case BT_NAME: 1.1644 + case BT_MINUS: 1.1645 + if (*ptr2++ != *ptr1++) 1.1646 + return 0; 1.1647 + if (MINBPC(enc) > 1) { 1.1648 + if (*ptr2++ != *ptr1++) 1.1649 + return 0; 1.1650 + if (MINBPC(enc) > 2) { 1.1651 + if (*ptr2++ != *ptr1++) 1.1652 + return 0; 1.1653 + if (MINBPC(enc) > 3) { 1.1654 + if (*ptr2++ != *ptr1++) 1.1655 + return 0; 1.1656 + } 1.1657 + } 1.1658 + } 1.1659 + break; 1.1660 + default: 1.1661 + if (MINBPC(enc) == 1 && *ptr1 == *ptr2) 1.1662 + return 1; 1.1663 + switch (BYTE_TYPE(enc, ptr2)) { 1.1664 + case BT_LEAD2: 1.1665 + case BT_LEAD3: 1.1666 + case BT_LEAD4: 1.1667 + case BT_NONASCII: 1.1668 + case BT_NMSTRT: 1.1669 +#ifdef XML_NS 1.1670 + case BT_COLON: 1.1671 +#endif 1.1672 + case BT_HEX: 1.1673 + case BT_DIGIT: 1.1674 + case BT_NAME: 1.1675 + case BT_MINUS: 1.1676 + return 0; 1.1677 + default: 1.1678 + return 1; 1.1679 + } 1.1680 + } 1.1681 + } 1.1682 + /* not reached */ 1.1683 +} 1.1684 + 1.1685 +static int PTRCALL 1.1686 +PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, 1.1687 + const char *end1, const char *ptr2) 1.1688 +{ 1.1689 + for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { 1.1690 + if (ptr1 == end1) 1.1691 + return 0; 1.1692 + if (!CHAR_MATCHES(enc, ptr1, *ptr2)) 1.1693 + return 0; 1.1694 + } 1.1695 + return ptr1 == end1; 1.1696 +} 1.1697 + 1.1698 +static int PTRFASTCALL 1.1699 +PREFIX(nameLength)(const ENCODING *enc, const char *ptr) 1.1700 +{ 1.1701 + const char *start = ptr; 1.1702 + for (;;) { 1.1703 + switch (BYTE_TYPE(enc, ptr)) { 1.1704 +#define LEAD_CASE(n) \ 1.1705 + case BT_LEAD ## n: ptr += n; break; 1.1706 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.1707 +#undef LEAD_CASE 1.1708 + case BT_NONASCII: 1.1709 + case BT_NMSTRT: 1.1710 +#ifdef XML_NS 1.1711 + case BT_COLON: 1.1712 +#endif 1.1713 + case BT_HEX: 1.1714 + case BT_DIGIT: 1.1715 + case BT_NAME: 1.1716 + case BT_MINUS: 1.1717 + ptr += MINBPC(enc); 1.1718 + break; 1.1719 + default: 1.1720 + return (int)(ptr - start); 1.1721 + } 1.1722 + } 1.1723 +} 1.1724 + 1.1725 +static const char * PTRFASTCALL 1.1726 +PREFIX(skipS)(const ENCODING *enc, const char *ptr) 1.1727 +{ 1.1728 + for (;;) { 1.1729 + switch (BYTE_TYPE(enc, ptr)) { 1.1730 + case BT_LF: 1.1731 + case BT_CR: 1.1732 + case BT_S: 1.1733 + ptr += MINBPC(enc); 1.1734 + break; 1.1735 + default: 1.1736 + return ptr; 1.1737 + } 1.1738 + } 1.1739 +} 1.1740 + 1.1741 +static void PTRCALL 1.1742 +PREFIX(updatePosition)(const ENCODING *enc, 1.1743 + const char *ptr, 1.1744 + const char *end, 1.1745 + POSITION *pos) 1.1746 +{ 1.1747 + while (ptr != end) { 1.1748 + switch (BYTE_TYPE(enc, ptr)) { 1.1749 +#define LEAD_CASE(n) \ 1.1750 + case BT_LEAD ## n: \ 1.1751 + ptr += n; \ 1.1752 + break; 1.1753 + LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1.1754 +#undef LEAD_CASE 1.1755 + case BT_LF: 1.1756 + pos->columnNumber = (XML_Size)-1; 1.1757 + pos->lineNumber++; 1.1758 + ptr += MINBPC(enc); 1.1759 + break; 1.1760 + case BT_CR: 1.1761 + pos->lineNumber++; 1.1762 + ptr += MINBPC(enc); 1.1763 + if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) 1.1764 + ptr += MINBPC(enc); 1.1765 + pos->columnNumber = (XML_Size)-1; 1.1766 + break; 1.1767 + default: 1.1768 + ptr += MINBPC(enc); 1.1769 + break; 1.1770 + } 1.1771 + pos->columnNumber++; 1.1772 + } 1.1773 +} 1.1774 + 1.1775 +#undef DO_LEAD_CASE 1.1776 +#undef MULTIBYTE_CASES 1.1777 +#undef INVALID_CASES 1.1778 +#undef CHECK_NAME_CASE 1.1779 +#undef CHECK_NAME_CASES 1.1780 +#undef CHECK_NMSTRT_CASE 1.1781 +#undef CHECK_NMSTRT_CASES 1.1782 +