parser/expat/lib/xmltok_impl.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
michael@0 2 See the file COPYING for copying permission.
michael@0 3 */
michael@0 4
michael@0 5 #ifndef IS_INVALID_CHAR
michael@0 6 #define IS_INVALID_CHAR(enc, ptr, n) (0)
michael@0 7 #endif
michael@0 8
michael@0 9 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
michael@0 10 case BT_LEAD ## n: \
michael@0 11 if (end - ptr < n) \
michael@0 12 return XML_TOK_PARTIAL_CHAR; \
michael@0 13 if (IS_INVALID_CHAR(enc, ptr, n)) { \
michael@0 14 *(nextTokPtr) = (ptr); \
michael@0 15 return XML_TOK_INVALID; \
michael@0 16 } \
michael@0 17 ptr += n; \
michael@0 18 break;
michael@0 19
michael@0 20 #define INVALID_CASES(ptr, nextTokPtr) \
michael@0 21 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
michael@0 22 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
michael@0 23 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
michael@0 24 case BT_NONXML: \
michael@0 25 case BT_MALFORM: \
michael@0 26 case BT_TRAIL: \
michael@0 27 *(nextTokPtr) = (ptr); \
michael@0 28 return XML_TOK_INVALID;
michael@0 29
michael@0 30 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
michael@0 31 case BT_LEAD ## n: \
michael@0 32 if (end - ptr < n) \
michael@0 33 return XML_TOK_PARTIAL_CHAR; \
michael@0 34 if (!IS_NAME_CHAR(enc, ptr, n)) { \
michael@0 35 *nextTokPtr = ptr; \
michael@0 36 return XML_TOK_INVALID; \
michael@0 37 } \
michael@0 38 ptr += n; \
michael@0 39 break;
michael@0 40
michael@0 41 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
michael@0 42 case BT_NONASCII: \
michael@0 43 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
michael@0 44 *nextTokPtr = ptr; \
michael@0 45 return XML_TOK_INVALID; \
michael@0 46 } \
michael@0 47 case BT_NMSTRT: \
michael@0 48 case BT_HEX: \
michael@0 49 case BT_DIGIT: \
michael@0 50 case BT_NAME: \
michael@0 51 case BT_MINUS: \
michael@0 52 ptr += MINBPC(enc); \
michael@0 53 break; \
michael@0 54 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
michael@0 55 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
michael@0 56 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
michael@0 57
michael@0 58 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
michael@0 59 case BT_LEAD ## n: \
michael@0 60 if (end - ptr < n) \
michael@0 61 return XML_TOK_PARTIAL_CHAR; \
michael@0 62 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
michael@0 63 *nextTokPtr = ptr; \
michael@0 64 return XML_TOK_INVALID; \
michael@0 65 } \
michael@0 66 ptr += n; \
michael@0 67 break;
michael@0 68
michael@0 69 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
michael@0 70 case BT_NONASCII: \
michael@0 71 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
michael@0 72 *nextTokPtr = ptr; \
michael@0 73 return XML_TOK_INVALID; \
michael@0 74 } \
michael@0 75 case BT_NMSTRT: \
michael@0 76 case BT_HEX: \
michael@0 77 ptr += MINBPC(enc); \
michael@0 78 break; \
michael@0 79 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
michael@0 80 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
michael@0 81 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
michael@0 82
michael@0 83 #ifndef PREFIX
michael@0 84 #define PREFIX(ident) ident
michael@0 85 #endif
michael@0 86
michael@0 87 /* ptr points to character following "<!-" */
michael@0 88
michael@0 89 static int PTRCALL
michael@0 90 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
michael@0 91 const char *end, const char **nextTokPtr)
michael@0 92 {
michael@0 93 if (ptr != end) {
michael@0 94 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
michael@0 95 *nextTokPtr = ptr;
michael@0 96 return XML_TOK_INVALID;
michael@0 97 }
michael@0 98 ptr += MINBPC(enc);
michael@0 99 while (ptr != end) {
michael@0 100 switch (BYTE_TYPE(enc, ptr)) {
michael@0 101 INVALID_CASES(ptr, nextTokPtr)
michael@0 102 case BT_MINUS:
michael@0 103 if ((ptr += MINBPC(enc)) == end)
michael@0 104 return XML_TOK_PARTIAL;
michael@0 105 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
michael@0 106 if ((ptr += MINBPC(enc)) == end)
michael@0 107 return XML_TOK_PARTIAL;
michael@0 108 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 109 *nextTokPtr = ptr;
michael@0 110 return XML_TOK_INVALID;
michael@0 111 }
michael@0 112 *nextTokPtr = ptr + MINBPC(enc);
michael@0 113 return XML_TOK_COMMENT;
michael@0 114 }
michael@0 115 break;
michael@0 116 default:
michael@0 117 ptr += MINBPC(enc);
michael@0 118 break;
michael@0 119 }
michael@0 120 }
michael@0 121 }
michael@0 122 return XML_TOK_PARTIAL;
michael@0 123 }
michael@0 124
michael@0 125 /* ptr points to character following "<!" */
michael@0 126
michael@0 127 static int PTRCALL
michael@0 128 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
michael@0 129 const char *end, const char **nextTokPtr)
michael@0 130 {
michael@0 131 if (ptr == end)
michael@0 132 return XML_TOK_PARTIAL;
michael@0 133 switch (BYTE_TYPE(enc, ptr)) {
michael@0 134 case BT_MINUS:
michael@0 135 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 136 case BT_LSQB:
michael@0 137 *nextTokPtr = ptr + MINBPC(enc);
michael@0 138 return XML_TOK_COND_SECT_OPEN;
michael@0 139 case BT_NMSTRT:
michael@0 140 case BT_HEX:
michael@0 141 ptr += MINBPC(enc);
michael@0 142 break;
michael@0 143 default:
michael@0 144 *nextTokPtr = ptr;
michael@0 145 return XML_TOK_INVALID;
michael@0 146 }
michael@0 147 while (ptr != end) {
michael@0 148 switch (BYTE_TYPE(enc, ptr)) {
michael@0 149 case BT_PERCNT:
michael@0 150 if (ptr + MINBPC(enc) == end)
michael@0 151 return XML_TOK_PARTIAL;
michael@0 152 /* don't allow <!ENTITY% foo "whatever"> */
michael@0 153 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
michael@0 154 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
michael@0 155 *nextTokPtr = ptr;
michael@0 156 return XML_TOK_INVALID;
michael@0 157 }
michael@0 158 /* fall through */
michael@0 159 case BT_S: case BT_CR: case BT_LF:
michael@0 160 *nextTokPtr = ptr;
michael@0 161 return XML_TOK_DECL_OPEN;
michael@0 162 case BT_NMSTRT:
michael@0 163 case BT_HEX:
michael@0 164 ptr += MINBPC(enc);
michael@0 165 break;
michael@0 166 default:
michael@0 167 *nextTokPtr = ptr;
michael@0 168 return XML_TOK_INVALID;
michael@0 169 }
michael@0 170 }
michael@0 171 return XML_TOK_PARTIAL;
michael@0 172 }
michael@0 173
michael@0 174 static int PTRCALL
michael@0 175 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
michael@0 176 const char *end, int *tokPtr)
michael@0 177 {
michael@0 178 int upper = 0;
michael@0 179 *tokPtr = XML_TOK_PI;
michael@0 180 if (end - ptr != MINBPC(enc)*3)
michael@0 181 return 1;
michael@0 182 switch (BYTE_TO_ASCII(enc, ptr)) {
michael@0 183 case ASCII_x:
michael@0 184 break;
michael@0 185 case ASCII_X:
michael@0 186 upper = 1;
michael@0 187 break;
michael@0 188 default:
michael@0 189 return 1;
michael@0 190 }
michael@0 191 ptr += MINBPC(enc);
michael@0 192 switch (BYTE_TO_ASCII(enc, ptr)) {
michael@0 193 case ASCII_m:
michael@0 194 break;
michael@0 195 case ASCII_M:
michael@0 196 upper = 1;
michael@0 197 break;
michael@0 198 default:
michael@0 199 return 1;
michael@0 200 }
michael@0 201 ptr += MINBPC(enc);
michael@0 202 switch (BYTE_TO_ASCII(enc, ptr)) {
michael@0 203 case ASCII_l:
michael@0 204 break;
michael@0 205 case ASCII_L:
michael@0 206 upper = 1;
michael@0 207 break;
michael@0 208 default:
michael@0 209 return 1;
michael@0 210 }
michael@0 211 if (upper)
michael@0 212 return 0;
michael@0 213 *tokPtr = XML_TOK_XML_DECL;
michael@0 214 return 1;
michael@0 215 }
michael@0 216
michael@0 217 /* ptr points to character following "<?" */
michael@0 218
michael@0 219 static int PTRCALL
michael@0 220 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
michael@0 221 const char *end, const char **nextTokPtr)
michael@0 222 {
michael@0 223 int tok;
michael@0 224 const char *target = ptr;
michael@0 225 if (ptr == end)
michael@0 226 return XML_TOK_PARTIAL;
michael@0 227 switch (BYTE_TYPE(enc, ptr)) {
michael@0 228 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 229 default:
michael@0 230 *nextTokPtr = ptr;
michael@0 231 return XML_TOK_INVALID;
michael@0 232 }
michael@0 233 while (ptr != end) {
michael@0 234 switch (BYTE_TYPE(enc, ptr)) {
michael@0 235 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 236 case BT_S: case BT_CR: case BT_LF:
michael@0 237 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
michael@0 238 *nextTokPtr = ptr;
michael@0 239 return XML_TOK_INVALID;
michael@0 240 }
michael@0 241 ptr += MINBPC(enc);
michael@0 242 while (ptr != end) {
michael@0 243 switch (BYTE_TYPE(enc, ptr)) {
michael@0 244 INVALID_CASES(ptr, nextTokPtr)
michael@0 245 case BT_QUEST:
michael@0 246 ptr += MINBPC(enc);
michael@0 247 if (ptr == end)
michael@0 248 return XML_TOK_PARTIAL;
michael@0 249 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 250 *nextTokPtr = ptr + MINBPC(enc);
michael@0 251 return tok;
michael@0 252 }
michael@0 253 break;
michael@0 254 default:
michael@0 255 ptr += MINBPC(enc);
michael@0 256 break;
michael@0 257 }
michael@0 258 }
michael@0 259 return XML_TOK_PARTIAL;
michael@0 260 case BT_QUEST:
michael@0 261 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
michael@0 262 *nextTokPtr = ptr;
michael@0 263 return XML_TOK_INVALID;
michael@0 264 }
michael@0 265 ptr += MINBPC(enc);
michael@0 266 if (ptr == end)
michael@0 267 return XML_TOK_PARTIAL;
michael@0 268 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 269 *nextTokPtr = ptr + MINBPC(enc);
michael@0 270 return tok;
michael@0 271 }
michael@0 272 /* fall through */
michael@0 273 default:
michael@0 274 *nextTokPtr = ptr;
michael@0 275 return XML_TOK_INVALID;
michael@0 276 }
michael@0 277 }
michael@0 278 return XML_TOK_PARTIAL;
michael@0 279 }
michael@0 280
michael@0 281 static int PTRCALL
michael@0 282 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
michael@0 283 const char *end, const char **nextTokPtr)
michael@0 284 {
michael@0 285 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
michael@0 286 ASCII_T, ASCII_A, ASCII_LSQB };
michael@0 287 int i;
michael@0 288 /* CDATA[ */
michael@0 289 if (end - ptr < 6 * MINBPC(enc))
michael@0 290 return XML_TOK_PARTIAL;
michael@0 291 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
michael@0 292 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
michael@0 293 *nextTokPtr = ptr;
michael@0 294 return XML_TOK_INVALID;
michael@0 295 }
michael@0 296 }
michael@0 297 *nextTokPtr = ptr;
michael@0 298 return XML_TOK_CDATA_SECT_OPEN;
michael@0 299 }
michael@0 300
michael@0 301 static int PTRCALL
michael@0 302 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
michael@0 303 const char *end, const char **nextTokPtr)
michael@0 304 {
michael@0 305 if (ptr == end)
michael@0 306 return XML_TOK_NONE;
michael@0 307 if (MINBPC(enc) > 1) {
michael@0 308 size_t n = end - ptr;
michael@0 309 if (n & (MINBPC(enc) - 1)) {
michael@0 310 n &= ~(MINBPC(enc) - 1);
michael@0 311 if (n == 0)
michael@0 312 return XML_TOK_PARTIAL;
michael@0 313 end = ptr + n;
michael@0 314 }
michael@0 315 }
michael@0 316 switch (BYTE_TYPE(enc, ptr)) {
michael@0 317 case BT_RSQB:
michael@0 318 ptr += MINBPC(enc);
michael@0 319 if (ptr == end)
michael@0 320 return XML_TOK_PARTIAL;
michael@0 321 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
michael@0 322 break;
michael@0 323 ptr += MINBPC(enc);
michael@0 324 if (ptr == end)
michael@0 325 return XML_TOK_PARTIAL;
michael@0 326 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 327 ptr -= MINBPC(enc);
michael@0 328 break;
michael@0 329 }
michael@0 330 *nextTokPtr = ptr + MINBPC(enc);
michael@0 331 return XML_TOK_CDATA_SECT_CLOSE;
michael@0 332 case BT_CR:
michael@0 333 ptr += MINBPC(enc);
michael@0 334 if (ptr == end)
michael@0 335 return XML_TOK_PARTIAL;
michael@0 336 if (BYTE_TYPE(enc, ptr) == BT_LF)
michael@0 337 ptr += MINBPC(enc);
michael@0 338 *nextTokPtr = ptr;
michael@0 339 return XML_TOK_DATA_NEWLINE;
michael@0 340 case BT_LF:
michael@0 341 *nextTokPtr = ptr + MINBPC(enc);
michael@0 342 return XML_TOK_DATA_NEWLINE;
michael@0 343 INVALID_CASES(ptr, nextTokPtr)
michael@0 344 default:
michael@0 345 ptr += MINBPC(enc);
michael@0 346 break;
michael@0 347 }
michael@0 348 while (ptr != end) {
michael@0 349 switch (BYTE_TYPE(enc, ptr)) {
michael@0 350 #define LEAD_CASE(n) \
michael@0 351 case BT_LEAD ## n: \
michael@0 352 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
michael@0 353 *nextTokPtr = ptr; \
michael@0 354 return XML_TOK_DATA_CHARS; \
michael@0 355 } \
michael@0 356 ptr += n; \
michael@0 357 break;
michael@0 358 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 359 #undef LEAD_CASE
michael@0 360 case BT_NONXML:
michael@0 361 case BT_MALFORM:
michael@0 362 case BT_TRAIL:
michael@0 363 case BT_CR:
michael@0 364 case BT_LF:
michael@0 365 case BT_RSQB:
michael@0 366 *nextTokPtr = ptr;
michael@0 367 return XML_TOK_DATA_CHARS;
michael@0 368 default:
michael@0 369 ptr += MINBPC(enc);
michael@0 370 break;
michael@0 371 }
michael@0 372 }
michael@0 373 *nextTokPtr = ptr;
michael@0 374 return XML_TOK_DATA_CHARS;
michael@0 375 }
michael@0 376
michael@0 377 /* ptr points to character following "</" */
michael@0 378
michael@0 379 static int PTRCALL
michael@0 380 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
michael@0 381 const char *end, const char **nextTokPtr)
michael@0 382 {
michael@0 383 if (ptr == end)
michael@0 384 return XML_TOK_PARTIAL;
michael@0 385 switch (BYTE_TYPE(enc, ptr)) {
michael@0 386 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 387 default:
michael@0 388 *nextTokPtr = ptr;
michael@0 389 return XML_TOK_INVALID;
michael@0 390 }
michael@0 391 while (ptr != end) {
michael@0 392 switch (BYTE_TYPE(enc, ptr)) {
michael@0 393 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 394 case BT_S: case BT_CR: case BT_LF:
michael@0 395 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
michael@0 396 switch (BYTE_TYPE(enc, ptr)) {
michael@0 397 case BT_S: case BT_CR: case BT_LF:
michael@0 398 break;
michael@0 399 case BT_GT:
michael@0 400 *nextTokPtr = ptr + MINBPC(enc);
michael@0 401 return XML_TOK_END_TAG;
michael@0 402 default:
michael@0 403 *nextTokPtr = ptr;
michael@0 404 return XML_TOK_INVALID;
michael@0 405 }
michael@0 406 }
michael@0 407 return XML_TOK_PARTIAL;
michael@0 408 #ifdef XML_NS
michael@0 409 case BT_COLON:
michael@0 410 /* no need to check qname syntax here,
michael@0 411 since end-tag must match exactly */
michael@0 412 ptr += MINBPC(enc);
michael@0 413 break;
michael@0 414 #endif
michael@0 415 case BT_GT:
michael@0 416 *nextTokPtr = ptr + MINBPC(enc);
michael@0 417 return XML_TOK_END_TAG;
michael@0 418 default:
michael@0 419 *nextTokPtr = ptr;
michael@0 420 return XML_TOK_INVALID;
michael@0 421 }
michael@0 422 }
michael@0 423 return XML_TOK_PARTIAL;
michael@0 424 }
michael@0 425
michael@0 426 /* ptr points to character following "&#X" */
michael@0 427
michael@0 428 static int PTRCALL
michael@0 429 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
michael@0 430 const char *end, const char **nextTokPtr)
michael@0 431 {
michael@0 432 if (ptr != end) {
michael@0 433 switch (BYTE_TYPE(enc, ptr)) {
michael@0 434 case BT_DIGIT:
michael@0 435 case BT_HEX:
michael@0 436 break;
michael@0 437 default:
michael@0 438 *nextTokPtr = ptr;
michael@0 439 return XML_TOK_INVALID;
michael@0 440 }
michael@0 441 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
michael@0 442 switch (BYTE_TYPE(enc, ptr)) {
michael@0 443 case BT_DIGIT:
michael@0 444 case BT_HEX:
michael@0 445 break;
michael@0 446 case BT_SEMI:
michael@0 447 *nextTokPtr = ptr + MINBPC(enc);
michael@0 448 return XML_TOK_CHAR_REF;
michael@0 449 default:
michael@0 450 *nextTokPtr = ptr;
michael@0 451 return XML_TOK_INVALID;
michael@0 452 }
michael@0 453 }
michael@0 454 }
michael@0 455 return XML_TOK_PARTIAL;
michael@0 456 }
michael@0 457
michael@0 458 /* ptr points to character following "&#" */
michael@0 459
michael@0 460 static int PTRCALL
michael@0 461 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
michael@0 462 const char *end, const char **nextTokPtr)
michael@0 463 {
michael@0 464 if (ptr != end) {
michael@0 465 if (CHAR_MATCHES(enc, ptr, ASCII_x))
michael@0 466 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 467 switch (BYTE_TYPE(enc, ptr)) {
michael@0 468 case BT_DIGIT:
michael@0 469 break;
michael@0 470 default:
michael@0 471 *nextTokPtr = ptr;
michael@0 472 return XML_TOK_INVALID;
michael@0 473 }
michael@0 474 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
michael@0 475 switch (BYTE_TYPE(enc, ptr)) {
michael@0 476 case BT_DIGIT:
michael@0 477 break;
michael@0 478 case BT_SEMI:
michael@0 479 *nextTokPtr = ptr + MINBPC(enc);
michael@0 480 return XML_TOK_CHAR_REF;
michael@0 481 default:
michael@0 482 *nextTokPtr = ptr;
michael@0 483 return XML_TOK_INVALID;
michael@0 484 }
michael@0 485 }
michael@0 486 }
michael@0 487 return XML_TOK_PARTIAL;
michael@0 488 }
michael@0 489
michael@0 490 /* ptr points to character following "&" */
michael@0 491
michael@0 492 static int PTRCALL
michael@0 493 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 494 const char **nextTokPtr)
michael@0 495 {
michael@0 496 if (ptr == end)
michael@0 497 return XML_TOK_PARTIAL;
michael@0 498 switch (BYTE_TYPE(enc, ptr)) {
michael@0 499 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 500 case BT_NUM:
michael@0 501 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 502 default:
michael@0 503 *nextTokPtr = ptr;
michael@0 504 return XML_TOK_INVALID;
michael@0 505 }
michael@0 506 while (ptr != end) {
michael@0 507 switch (BYTE_TYPE(enc, ptr)) {
michael@0 508 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 509 case BT_SEMI:
michael@0 510 *nextTokPtr = ptr + MINBPC(enc);
michael@0 511 return XML_TOK_ENTITY_REF;
michael@0 512 default:
michael@0 513 *nextTokPtr = ptr;
michael@0 514 return XML_TOK_INVALID;
michael@0 515 }
michael@0 516 }
michael@0 517 return XML_TOK_PARTIAL;
michael@0 518 }
michael@0 519
michael@0 520 /* ptr points to character following first character of attribute name */
michael@0 521
michael@0 522 static int PTRCALL
michael@0 523 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 524 const char **nextTokPtr)
michael@0 525 {
michael@0 526 #ifdef XML_NS
michael@0 527 int hadColon = 0;
michael@0 528 #endif
michael@0 529 while (ptr != end) {
michael@0 530 switch (BYTE_TYPE(enc, ptr)) {
michael@0 531 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 532 #ifdef XML_NS
michael@0 533 case BT_COLON:
michael@0 534 if (hadColon) {
michael@0 535 *nextTokPtr = ptr;
michael@0 536 return XML_TOK_INVALID;
michael@0 537 }
michael@0 538 hadColon = 1;
michael@0 539 ptr += MINBPC(enc);
michael@0 540 if (ptr == end)
michael@0 541 return XML_TOK_PARTIAL;
michael@0 542 switch (BYTE_TYPE(enc, ptr)) {
michael@0 543 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 544 default:
michael@0 545 *nextTokPtr = ptr;
michael@0 546 return XML_TOK_INVALID;
michael@0 547 }
michael@0 548 break;
michael@0 549 #endif
michael@0 550 case BT_S: case BT_CR: case BT_LF:
michael@0 551 for (;;) {
michael@0 552 int t;
michael@0 553
michael@0 554 ptr += MINBPC(enc);
michael@0 555 if (ptr == end)
michael@0 556 return XML_TOK_PARTIAL;
michael@0 557 t = BYTE_TYPE(enc, ptr);
michael@0 558 if (t == BT_EQUALS)
michael@0 559 break;
michael@0 560 switch (t) {
michael@0 561 case BT_S:
michael@0 562 case BT_LF:
michael@0 563 case BT_CR:
michael@0 564 break;
michael@0 565 default:
michael@0 566 *nextTokPtr = ptr;
michael@0 567 return XML_TOK_INVALID;
michael@0 568 }
michael@0 569 }
michael@0 570 /* fall through */
michael@0 571 case BT_EQUALS:
michael@0 572 {
michael@0 573 int open;
michael@0 574 #ifdef XML_NS
michael@0 575 hadColon = 0;
michael@0 576 #endif
michael@0 577 for (;;) {
michael@0 578 ptr += MINBPC(enc);
michael@0 579 if (ptr == end)
michael@0 580 return XML_TOK_PARTIAL;
michael@0 581 open = BYTE_TYPE(enc, ptr);
michael@0 582 if (open == BT_QUOT || open == BT_APOS)
michael@0 583 break;
michael@0 584 switch (open) {
michael@0 585 case BT_S:
michael@0 586 case BT_LF:
michael@0 587 case BT_CR:
michael@0 588 break;
michael@0 589 default:
michael@0 590 *nextTokPtr = ptr;
michael@0 591 return XML_TOK_INVALID;
michael@0 592 }
michael@0 593 }
michael@0 594 ptr += MINBPC(enc);
michael@0 595 /* in attribute value */
michael@0 596 for (;;) {
michael@0 597 int t;
michael@0 598 if (ptr == end)
michael@0 599 return XML_TOK_PARTIAL;
michael@0 600 t = BYTE_TYPE(enc, ptr);
michael@0 601 if (t == open)
michael@0 602 break;
michael@0 603 switch (t) {
michael@0 604 INVALID_CASES(ptr, nextTokPtr)
michael@0 605 case BT_AMP:
michael@0 606 {
michael@0 607 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
michael@0 608 if (tok <= 0) {
michael@0 609 if (tok == XML_TOK_INVALID)
michael@0 610 *nextTokPtr = ptr;
michael@0 611 return tok;
michael@0 612 }
michael@0 613 break;
michael@0 614 }
michael@0 615 case BT_LT:
michael@0 616 *nextTokPtr = ptr;
michael@0 617 return XML_TOK_INVALID;
michael@0 618 default:
michael@0 619 ptr += MINBPC(enc);
michael@0 620 break;
michael@0 621 }
michael@0 622 }
michael@0 623 ptr += MINBPC(enc);
michael@0 624 if (ptr == end)
michael@0 625 return XML_TOK_PARTIAL;
michael@0 626 switch (BYTE_TYPE(enc, ptr)) {
michael@0 627 case BT_S:
michael@0 628 case BT_CR:
michael@0 629 case BT_LF:
michael@0 630 break;
michael@0 631 case BT_SOL:
michael@0 632 goto sol;
michael@0 633 case BT_GT:
michael@0 634 goto gt;
michael@0 635 default:
michael@0 636 *nextTokPtr = ptr;
michael@0 637 return XML_TOK_INVALID;
michael@0 638 }
michael@0 639 /* ptr points to closing quote */
michael@0 640 for (;;) {
michael@0 641 ptr += MINBPC(enc);
michael@0 642 if (ptr == end)
michael@0 643 return XML_TOK_PARTIAL;
michael@0 644 switch (BYTE_TYPE(enc, ptr)) {
michael@0 645 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 646 case BT_S: case BT_CR: case BT_LF:
michael@0 647 continue;
michael@0 648 case BT_GT:
michael@0 649 gt:
michael@0 650 *nextTokPtr = ptr + MINBPC(enc);
michael@0 651 return XML_TOK_START_TAG_WITH_ATTS;
michael@0 652 case BT_SOL:
michael@0 653 sol:
michael@0 654 ptr += MINBPC(enc);
michael@0 655 if (ptr == end)
michael@0 656 return XML_TOK_PARTIAL;
michael@0 657 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 658 *nextTokPtr = ptr;
michael@0 659 return XML_TOK_INVALID;
michael@0 660 }
michael@0 661 *nextTokPtr = ptr + MINBPC(enc);
michael@0 662 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
michael@0 663 default:
michael@0 664 *nextTokPtr = ptr;
michael@0 665 return XML_TOK_INVALID;
michael@0 666 }
michael@0 667 break;
michael@0 668 }
michael@0 669 break;
michael@0 670 }
michael@0 671 default:
michael@0 672 *nextTokPtr = ptr;
michael@0 673 return XML_TOK_INVALID;
michael@0 674 }
michael@0 675 }
michael@0 676 return XML_TOK_PARTIAL;
michael@0 677 }
michael@0 678
michael@0 679 /* ptr points to character following "<" */
michael@0 680
michael@0 681 static int PTRCALL
michael@0 682 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 683 const char **nextTokPtr)
michael@0 684 {
michael@0 685 #ifdef XML_NS
michael@0 686 int hadColon;
michael@0 687 #endif
michael@0 688 if (ptr == end)
michael@0 689 return XML_TOK_PARTIAL;
michael@0 690 switch (BYTE_TYPE(enc, ptr)) {
michael@0 691 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 692 case BT_EXCL:
michael@0 693 if ((ptr += MINBPC(enc)) == end)
michael@0 694 return XML_TOK_PARTIAL;
michael@0 695 switch (BYTE_TYPE(enc, ptr)) {
michael@0 696 case BT_MINUS:
michael@0 697 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 698 case BT_LSQB:
michael@0 699 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
michael@0 700 end, nextTokPtr);
michael@0 701 }
michael@0 702 *nextTokPtr = ptr;
michael@0 703 return XML_TOK_INVALID;
michael@0 704 case BT_QUEST:
michael@0 705 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 706 case BT_SOL:
michael@0 707 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 708 default:
michael@0 709 *nextTokPtr = ptr;
michael@0 710 return XML_TOK_INVALID;
michael@0 711 }
michael@0 712 #ifdef XML_NS
michael@0 713 hadColon = 0;
michael@0 714 #endif
michael@0 715 /* we have a start-tag */
michael@0 716 while (ptr != end) {
michael@0 717 switch (BYTE_TYPE(enc, ptr)) {
michael@0 718 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 719 #ifdef XML_NS
michael@0 720 case BT_COLON:
michael@0 721 if (hadColon) {
michael@0 722 *nextTokPtr = ptr;
michael@0 723 return XML_TOK_INVALID;
michael@0 724 }
michael@0 725 hadColon = 1;
michael@0 726 ptr += MINBPC(enc);
michael@0 727 if (ptr == end)
michael@0 728 return XML_TOK_PARTIAL;
michael@0 729 switch (BYTE_TYPE(enc, ptr)) {
michael@0 730 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 731 default:
michael@0 732 *nextTokPtr = ptr;
michael@0 733 return XML_TOK_INVALID;
michael@0 734 }
michael@0 735 break;
michael@0 736 #endif
michael@0 737 case BT_S: case BT_CR: case BT_LF:
michael@0 738 {
michael@0 739 ptr += MINBPC(enc);
michael@0 740 while (ptr != end) {
michael@0 741 switch (BYTE_TYPE(enc, ptr)) {
michael@0 742 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 743 case BT_GT:
michael@0 744 goto gt;
michael@0 745 case BT_SOL:
michael@0 746 goto sol;
michael@0 747 case BT_S: case BT_CR: case BT_LF:
michael@0 748 ptr += MINBPC(enc);
michael@0 749 continue;
michael@0 750 default:
michael@0 751 *nextTokPtr = ptr;
michael@0 752 return XML_TOK_INVALID;
michael@0 753 }
michael@0 754 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
michael@0 755 }
michael@0 756 return XML_TOK_PARTIAL;
michael@0 757 }
michael@0 758 case BT_GT:
michael@0 759 gt:
michael@0 760 *nextTokPtr = ptr + MINBPC(enc);
michael@0 761 return XML_TOK_START_TAG_NO_ATTS;
michael@0 762 case BT_SOL:
michael@0 763 sol:
michael@0 764 ptr += MINBPC(enc);
michael@0 765 if (ptr == end)
michael@0 766 return XML_TOK_PARTIAL;
michael@0 767 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 768 *nextTokPtr = ptr;
michael@0 769 return XML_TOK_INVALID;
michael@0 770 }
michael@0 771 *nextTokPtr = ptr + MINBPC(enc);
michael@0 772 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
michael@0 773 default:
michael@0 774 *nextTokPtr = ptr;
michael@0 775 return XML_TOK_INVALID;
michael@0 776 }
michael@0 777 }
michael@0 778 return XML_TOK_PARTIAL;
michael@0 779 }
michael@0 780
michael@0 781 static int PTRCALL
michael@0 782 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 783 const char **nextTokPtr)
michael@0 784 {
michael@0 785 if (ptr == end)
michael@0 786 return XML_TOK_NONE;
michael@0 787 if (MINBPC(enc) > 1) {
michael@0 788 size_t n = end - ptr;
michael@0 789 if (n & (MINBPC(enc) - 1)) {
michael@0 790 n &= ~(MINBPC(enc) - 1);
michael@0 791 if (n == 0)
michael@0 792 return XML_TOK_PARTIAL;
michael@0 793 end = ptr + n;
michael@0 794 }
michael@0 795 }
michael@0 796 switch (BYTE_TYPE(enc, ptr)) {
michael@0 797 case BT_LT:
michael@0 798 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 799 case BT_AMP:
michael@0 800 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 801 case BT_CR:
michael@0 802 ptr += MINBPC(enc);
michael@0 803 if (ptr == end)
michael@0 804 return XML_TOK_TRAILING_CR;
michael@0 805 if (BYTE_TYPE(enc, ptr) == BT_LF)
michael@0 806 ptr += MINBPC(enc);
michael@0 807 *nextTokPtr = ptr;
michael@0 808 return XML_TOK_DATA_NEWLINE;
michael@0 809 case BT_LF:
michael@0 810 *nextTokPtr = ptr + MINBPC(enc);
michael@0 811 return XML_TOK_DATA_NEWLINE;
michael@0 812 case BT_RSQB:
michael@0 813 ptr += MINBPC(enc);
michael@0 814 if (ptr == end)
michael@0 815 return XML_TOK_TRAILING_RSQB;
michael@0 816 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
michael@0 817 break;
michael@0 818 ptr += MINBPC(enc);
michael@0 819 if (ptr == end)
michael@0 820 return XML_TOK_TRAILING_RSQB;
michael@0 821 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 822 ptr -= MINBPC(enc);
michael@0 823 break;
michael@0 824 }
michael@0 825 *nextTokPtr = ptr;
michael@0 826 return XML_TOK_INVALID;
michael@0 827 INVALID_CASES(ptr, nextTokPtr)
michael@0 828 default:
michael@0 829 ptr += MINBPC(enc);
michael@0 830 break;
michael@0 831 }
michael@0 832 while (ptr != end) {
michael@0 833 switch (BYTE_TYPE(enc, ptr)) {
michael@0 834 #define LEAD_CASE(n) \
michael@0 835 case BT_LEAD ## n: \
michael@0 836 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
michael@0 837 *nextTokPtr = ptr; \
michael@0 838 return XML_TOK_DATA_CHARS; \
michael@0 839 } \
michael@0 840 ptr += n; \
michael@0 841 break;
michael@0 842 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 843 #undef LEAD_CASE
michael@0 844 case BT_RSQB:
michael@0 845 if (ptr + MINBPC(enc) != end) {
michael@0 846 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
michael@0 847 ptr += MINBPC(enc);
michael@0 848 break;
michael@0 849 }
michael@0 850 if (ptr + 2*MINBPC(enc) != end) {
michael@0 851 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
michael@0 852 ptr += MINBPC(enc);
michael@0 853 break;
michael@0 854 }
michael@0 855 *nextTokPtr = ptr + 2*MINBPC(enc);
michael@0 856 return XML_TOK_INVALID;
michael@0 857 }
michael@0 858 }
michael@0 859 /* fall through */
michael@0 860 case BT_AMP:
michael@0 861 case BT_LT:
michael@0 862 case BT_NONXML:
michael@0 863 case BT_MALFORM:
michael@0 864 case BT_TRAIL:
michael@0 865 case BT_CR:
michael@0 866 case BT_LF:
michael@0 867 *nextTokPtr = ptr;
michael@0 868 return XML_TOK_DATA_CHARS;
michael@0 869 default:
michael@0 870 ptr += MINBPC(enc);
michael@0 871 break;
michael@0 872 }
michael@0 873 }
michael@0 874 *nextTokPtr = ptr;
michael@0 875 return XML_TOK_DATA_CHARS;
michael@0 876 }
michael@0 877
michael@0 878 /* ptr points to character following "%" */
michael@0 879
michael@0 880 static int PTRCALL
michael@0 881 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 882 const char **nextTokPtr)
michael@0 883 {
michael@0 884 if (ptr == end)
michael@0 885 return -XML_TOK_PERCENT;
michael@0 886 switch (BYTE_TYPE(enc, ptr)) {
michael@0 887 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 888 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
michael@0 889 *nextTokPtr = ptr;
michael@0 890 return XML_TOK_PERCENT;
michael@0 891 default:
michael@0 892 *nextTokPtr = ptr;
michael@0 893 return XML_TOK_INVALID;
michael@0 894 }
michael@0 895 while (ptr != end) {
michael@0 896 switch (BYTE_TYPE(enc, ptr)) {
michael@0 897 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 898 case BT_SEMI:
michael@0 899 *nextTokPtr = ptr + MINBPC(enc);
michael@0 900 return XML_TOK_PARAM_ENTITY_REF;
michael@0 901 default:
michael@0 902 *nextTokPtr = ptr;
michael@0 903 return XML_TOK_INVALID;
michael@0 904 }
michael@0 905 }
michael@0 906 return XML_TOK_PARTIAL;
michael@0 907 }
michael@0 908
michael@0 909 static int PTRCALL
michael@0 910 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 911 const char **nextTokPtr)
michael@0 912 {
michael@0 913 if (ptr == end)
michael@0 914 return XML_TOK_PARTIAL;
michael@0 915 switch (BYTE_TYPE(enc, ptr)) {
michael@0 916 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
michael@0 917 default:
michael@0 918 *nextTokPtr = ptr;
michael@0 919 return XML_TOK_INVALID;
michael@0 920 }
michael@0 921 while (ptr != end) {
michael@0 922 switch (BYTE_TYPE(enc, ptr)) {
michael@0 923 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 924 case BT_CR: case BT_LF: case BT_S:
michael@0 925 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
michael@0 926 *nextTokPtr = ptr;
michael@0 927 return XML_TOK_POUND_NAME;
michael@0 928 default:
michael@0 929 *nextTokPtr = ptr;
michael@0 930 return XML_TOK_INVALID;
michael@0 931 }
michael@0 932 }
michael@0 933 return -XML_TOK_POUND_NAME;
michael@0 934 }
michael@0 935
michael@0 936 static int PTRCALL
michael@0 937 PREFIX(scanLit)(int open, const ENCODING *enc,
michael@0 938 const char *ptr, const char *end,
michael@0 939 const char **nextTokPtr)
michael@0 940 {
michael@0 941 while (ptr != end) {
michael@0 942 int t = BYTE_TYPE(enc, ptr);
michael@0 943 switch (t) {
michael@0 944 INVALID_CASES(ptr, nextTokPtr)
michael@0 945 case BT_QUOT:
michael@0 946 case BT_APOS:
michael@0 947 ptr += MINBPC(enc);
michael@0 948 if (t != open)
michael@0 949 break;
michael@0 950 if (ptr == end)
michael@0 951 return -XML_TOK_LITERAL;
michael@0 952 *nextTokPtr = ptr;
michael@0 953 switch (BYTE_TYPE(enc, ptr)) {
michael@0 954 case BT_S: case BT_CR: case BT_LF:
michael@0 955 case BT_GT: case BT_PERCNT: case BT_LSQB:
michael@0 956 return XML_TOK_LITERAL;
michael@0 957 default:
michael@0 958 return XML_TOK_INVALID;
michael@0 959 }
michael@0 960 default:
michael@0 961 ptr += MINBPC(enc);
michael@0 962 break;
michael@0 963 }
michael@0 964 }
michael@0 965 return XML_TOK_PARTIAL;
michael@0 966 }
michael@0 967
michael@0 968 static int PTRCALL
michael@0 969 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 970 const char **nextTokPtr)
michael@0 971 {
michael@0 972 int tok;
michael@0 973 if (ptr == end)
michael@0 974 return XML_TOK_NONE;
michael@0 975 if (MINBPC(enc) > 1) {
michael@0 976 size_t n = end - ptr;
michael@0 977 if (n & (MINBPC(enc) - 1)) {
michael@0 978 n &= ~(MINBPC(enc) - 1);
michael@0 979 if (n == 0)
michael@0 980 return XML_TOK_PARTIAL;
michael@0 981 end = ptr + n;
michael@0 982 }
michael@0 983 }
michael@0 984 switch (BYTE_TYPE(enc, ptr)) {
michael@0 985 case BT_QUOT:
michael@0 986 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 987 case BT_APOS:
michael@0 988 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 989 case BT_LT:
michael@0 990 {
michael@0 991 ptr += MINBPC(enc);
michael@0 992 if (ptr == end)
michael@0 993 return XML_TOK_PARTIAL;
michael@0 994 switch (BYTE_TYPE(enc, ptr)) {
michael@0 995 case BT_EXCL:
michael@0 996 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 997 case BT_QUEST:
michael@0 998 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 999 case BT_NMSTRT:
michael@0 1000 case BT_HEX:
michael@0 1001 case BT_NONASCII:
michael@0 1002 case BT_LEAD2:
michael@0 1003 case BT_LEAD3:
michael@0 1004 case BT_LEAD4:
michael@0 1005 *nextTokPtr = ptr - MINBPC(enc);
michael@0 1006 return XML_TOK_INSTANCE_START;
michael@0 1007 }
michael@0 1008 *nextTokPtr = ptr;
michael@0 1009 return XML_TOK_INVALID;
michael@0 1010 }
michael@0 1011 case BT_CR:
michael@0 1012 if (ptr + MINBPC(enc) == end) {
michael@0 1013 *nextTokPtr = end;
michael@0 1014 /* indicate that this might be part of a CR/LF pair */
michael@0 1015 return -XML_TOK_PROLOG_S;
michael@0 1016 }
michael@0 1017 /* fall through */
michael@0 1018 case BT_S: case BT_LF:
michael@0 1019 for (;;) {
michael@0 1020 ptr += MINBPC(enc);
michael@0 1021 if (ptr == end)
michael@0 1022 break;
michael@0 1023 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1024 case BT_S: case BT_LF:
michael@0 1025 break;
michael@0 1026 case BT_CR:
michael@0 1027 /* don't split CR/LF pair */
michael@0 1028 if (ptr + MINBPC(enc) != end)
michael@0 1029 break;
michael@0 1030 /* fall through */
michael@0 1031 default:
michael@0 1032 *nextTokPtr = ptr;
michael@0 1033 return XML_TOK_PROLOG_S;
michael@0 1034 }
michael@0 1035 }
michael@0 1036 *nextTokPtr = ptr;
michael@0 1037 return XML_TOK_PROLOG_S;
michael@0 1038 case BT_PERCNT:
michael@0 1039 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 1040 case BT_COMMA:
michael@0 1041 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1042 return XML_TOK_COMMA;
michael@0 1043 case BT_LSQB:
michael@0 1044 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1045 return XML_TOK_OPEN_BRACKET;
michael@0 1046 case BT_RSQB:
michael@0 1047 ptr += MINBPC(enc);
michael@0 1048 if (ptr == end)
michael@0 1049 return -XML_TOK_CLOSE_BRACKET;
michael@0 1050 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
michael@0 1051 if (ptr + MINBPC(enc) == end)
michael@0 1052 return XML_TOK_PARTIAL;
michael@0 1053 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
michael@0 1054 *nextTokPtr = ptr + 2*MINBPC(enc);
michael@0 1055 return XML_TOK_COND_SECT_CLOSE;
michael@0 1056 }
michael@0 1057 }
michael@0 1058 *nextTokPtr = ptr;
michael@0 1059 return XML_TOK_CLOSE_BRACKET;
michael@0 1060 case BT_LPAR:
michael@0 1061 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1062 return XML_TOK_OPEN_PAREN;
michael@0 1063 case BT_RPAR:
michael@0 1064 ptr += MINBPC(enc);
michael@0 1065 if (ptr == end)
michael@0 1066 return -XML_TOK_CLOSE_PAREN;
michael@0 1067 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1068 case BT_AST:
michael@0 1069 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1070 return XML_TOK_CLOSE_PAREN_ASTERISK;
michael@0 1071 case BT_QUEST:
michael@0 1072 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1073 return XML_TOK_CLOSE_PAREN_QUESTION;
michael@0 1074 case BT_PLUS:
michael@0 1075 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1076 return XML_TOK_CLOSE_PAREN_PLUS;
michael@0 1077 case BT_CR: case BT_LF: case BT_S:
michael@0 1078 case BT_GT: case BT_COMMA: case BT_VERBAR:
michael@0 1079 case BT_RPAR:
michael@0 1080 *nextTokPtr = ptr;
michael@0 1081 return XML_TOK_CLOSE_PAREN;
michael@0 1082 }
michael@0 1083 *nextTokPtr = ptr;
michael@0 1084 return XML_TOK_INVALID;
michael@0 1085 case BT_VERBAR:
michael@0 1086 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1087 return XML_TOK_OR;
michael@0 1088 case BT_GT:
michael@0 1089 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1090 return XML_TOK_DECL_CLOSE;
michael@0 1091 case BT_NUM:
michael@0 1092 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 1093 #define LEAD_CASE(n) \
michael@0 1094 case BT_LEAD ## n: \
michael@0 1095 if (end - ptr < n) \
michael@0 1096 return XML_TOK_PARTIAL_CHAR; \
michael@0 1097 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
michael@0 1098 ptr += n; \
michael@0 1099 tok = XML_TOK_NAME; \
michael@0 1100 break; \
michael@0 1101 } \
michael@0 1102 if (IS_NAME_CHAR(enc, ptr, n)) { \
michael@0 1103 ptr += n; \
michael@0 1104 tok = XML_TOK_NMTOKEN; \
michael@0 1105 break; \
michael@0 1106 } \
michael@0 1107 *nextTokPtr = ptr; \
michael@0 1108 return XML_TOK_INVALID;
michael@0 1109 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 1110 #undef LEAD_CASE
michael@0 1111 case BT_NMSTRT:
michael@0 1112 case BT_HEX:
michael@0 1113 tok = XML_TOK_NAME;
michael@0 1114 ptr += MINBPC(enc);
michael@0 1115 break;
michael@0 1116 case BT_DIGIT:
michael@0 1117 case BT_NAME:
michael@0 1118 case BT_MINUS:
michael@0 1119 #ifdef XML_NS
michael@0 1120 case BT_COLON:
michael@0 1121 #endif
michael@0 1122 tok = XML_TOK_NMTOKEN;
michael@0 1123 ptr += MINBPC(enc);
michael@0 1124 break;
michael@0 1125 case BT_NONASCII:
michael@0 1126 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
michael@0 1127 ptr += MINBPC(enc);
michael@0 1128 tok = XML_TOK_NAME;
michael@0 1129 break;
michael@0 1130 }
michael@0 1131 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
michael@0 1132 ptr += MINBPC(enc);
michael@0 1133 tok = XML_TOK_NMTOKEN;
michael@0 1134 break;
michael@0 1135 }
michael@0 1136 /* fall through */
michael@0 1137 default:
michael@0 1138 *nextTokPtr = ptr;
michael@0 1139 return XML_TOK_INVALID;
michael@0 1140 }
michael@0 1141 while (ptr != end) {
michael@0 1142 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1143 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 1144 case BT_GT: case BT_RPAR: case BT_COMMA:
michael@0 1145 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
michael@0 1146 case BT_S: case BT_CR: case BT_LF:
michael@0 1147 *nextTokPtr = ptr;
michael@0 1148 return tok;
michael@0 1149 #ifdef XML_NS
michael@0 1150 case BT_COLON:
michael@0 1151 ptr += MINBPC(enc);
michael@0 1152 switch (tok) {
michael@0 1153 case XML_TOK_NAME:
michael@0 1154 if (ptr == end)
michael@0 1155 return XML_TOK_PARTIAL;
michael@0 1156 tok = XML_TOK_PREFIXED_NAME;
michael@0 1157 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1158 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
michael@0 1159 default:
michael@0 1160 tok = XML_TOK_NMTOKEN;
michael@0 1161 break;
michael@0 1162 }
michael@0 1163 break;
michael@0 1164 case XML_TOK_PREFIXED_NAME:
michael@0 1165 tok = XML_TOK_NMTOKEN;
michael@0 1166 break;
michael@0 1167 }
michael@0 1168 break;
michael@0 1169 #endif
michael@0 1170 case BT_PLUS:
michael@0 1171 if (tok == XML_TOK_NMTOKEN) {
michael@0 1172 *nextTokPtr = ptr;
michael@0 1173 return XML_TOK_INVALID;
michael@0 1174 }
michael@0 1175 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1176 return XML_TOK_NAME_PLUS;
michael@0 1177 case BT_AST:
michael@0 1178 if (tok == XML_TOK_NMTOKEN) {
michael@0 1179 *nextTokPtr = ptr;
michael@0 1180 return XML_TOK_INVALID;
michael@0 1181 }
michael@0 1182 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1183 return XML_TOK_NAME_ASTERISK;
michael@0 1184 case BT_QUEST:
michael@0 1185 if (tok == XML_TOK_NMTOKEN) {
michael@0 1186 *nextTokPtr = ptr;
michael@0 1187 return XML_TOK_INVALID;
michael@0 1188 }
michael@0 1189 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1190 return XML_TOK_NAME_QUESTION;
michael@0 1191 default:
michael@0 1192 *nextTokPtr = ptr;
michael@0 1193 return XML_TOK_INVALID;
michael@0 1194 }
michael@0 1195 }
michael@0 1196 return -tok;
michael@0 1197 }
michael@0 1198
michael@0 1199 static int PTRCALL
michael@0 1200 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
michael@0 1201 const char *end, const char **nextTokPtr)
michael@0 1202 {
michael@0 1203 const char *start;
michael@0 1204 if (ptr == end)
michael@0 1205 return XML_TOK_NONE;
michael@0 1206 start = ptr;
michael@0 1207 while (ptr != end) {
michael@0 1208 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1209 #define LEAD_CASE(n) \
michael@0 1210 case BT_LEAD ## n: ptr += n; break;
michael@0 1211 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 1212 #undef LEAD_CASE
michael@0 1213 case BT_AMP:
michael@0 1214 if (ptr == start)
michael@0 1215 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 1216 *nextTokPtr = ptr;
michael@0 1217 return XML_TOK_DATA_CHARS;
michael@0 1218 case BT_LT:
michael@0 1219 /* this is for inside entity references */
michael@0 1220 *nextTokPtr = ptr;
michael@0 1221 return XML_TOK_INVALID;
michael@0 1222 case BT_LF:
michael@0 1223 if (ptr == start) {
michael@0 1224 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1225 return XML_TOK_DATA_NEWLINE;
michael@0 1226 }
michael@0 1227 *nextTokPtr = ptr;
michael@0 1228 return XML_TOK_DATA_CHARS;
michael@0 1229 case BT_CR:
michael@0 1230 if (ptr == start) {
michael@0 1231 ptr += MINBPC(enc);
michael@0 1232 if (ptr == end)
michael@0 1233 return XML_TOK_TRAILING_CR;
michael@0 1234 if (BYTE_TYPE(enc, ptr) == BT_LF)
michael@0 1235 ptr += MINBPC(enc);
michael@0 1236 *nextTokPtr = ptr;
michael@0 1237 return XML_TOK_DATA_NEWLINE;
michael@0 1238 }
michael@0 1239 *nextTokPtr = ptr;
michael@0 1240 return XML_TOK_DATA_CHARS;
michael@0 1241 case BT_S:
michael@0 1242 if (ptr == start) {
michael@0 1243 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1244 return XML_TOK_ATTRIBUTE_VALUE_S;
michael@0 1245 }
michael@0 1246 *nextTokPtr = ptr;
michael@0 1247 return XML_TOK_DATA_CHARS;
michael@0 1248 default:
michael@0 1249 ptr += MINBPC(enc);
michael@0 1250 break;
michael@0 1251 }
michael@0 1252 }
michael@0 1253 *nextTokPtr = ptr;
michael@0 1254 return XML_TOK_DATA_CHARS;
michael@0 1255 }
michael@0 1256
michael@0 1257 static int PTRCALL
michael@0 1258 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
michael@0 1259 const char *end, const char **nextTokPtr)
michael@0 1260 {
michael@0 1261 const char *start;
michael@0 1262 if (ptr == end)
michael@0 1263 return XML_TOK_NONE;
michael@0 1264 start = ptr;
michael@0 1265 while (ptr != end) {
michael@0 1266 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1267 #define LEAD_CASE(n) \
michael@0 1268 case BT_LEAD ## n: ptr += n; break;
michael@0 1269 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 1270 #undef LEAD_CASE
michael@0 1271 case BT_AMP:
michael@0 1272 if (ptr == start)
michael@0 1273 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
michael@0 1274 *nextTokPtr = ptr;
michael@0 1275 return XML_TOK_DATA_CHARS;
michael@0 1276 case BT_PERCNT:
michael@0 1277 if (ptr == start) {
michael@0 1278 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
michael@0 1279 end, nextTokPtr);
michael@0 1280 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
michael@0 1281 }
michael@0 1282 *nextTokPtr = ptr;
michael@0 1283 return XML_TOK_DATA_CHARS;
michael@0 1284 case BT_LF:
michael@0 1285 if (ptr == start) {
michael@0 1286 *nextTokPtr = ptr + MINBPC(enc);
michael@0 1287 return XML_TOK_DATA_NEWLINE;
michael@0 1288 }
michael@0 1289 *nextTokPtr = ptr;
michael@0 1290 return XML_TOK_DATA_CHARS;
michael@0 1291 case BT_CR:
michael@0 1292 if (ptr == start) {
michael@0 1293 ptr += MINBPC(enc);
michael@0 1294 if (ptr == end)
michael@0 1295 return XML_TOK_TRAILING_CR;
michael@0 1296 if (BYTE_TYPE(enc, ptr) == BT_LF)
michael@0 1297 ptr += MINBPC(enc);
michael@0 1298 *nextTokPtr = ptr;
michael@0 1299 return XML_TOK_DATA_NEWLINE;
michael@0 1300 }
michael@0 1301 *nextTokPtr = ptr;
michael@0 1302 return XML_TOK_DATA_CHARS;
michael@0 1303 default:
michael@0 1304 ptr += MINBPC(enc);
michael@0 1305 break;
michael@0 1306 }
michael@0 1307 }
michael@0 1308 *nextTokPtr = ptr;
michael@0 1309 return XML_TOK_DATA_CHARS;
michael@0 1310 }
michael@0 1311
michael@0 1312 #ifdef XML_DTD
michael@0 1313
michael@0 1314 static int PTRCALL
michael@0 1315 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
michael@0 1316 const char *end, const char **nextTokPtr)
michael@0 1317 {
michael@0 1318 int level = 0;
michael@0 1319 if (MINBPC(enc) > 1) {
michael@0 1320 size_t n = end - ptr;
michael@0 1321 if (n & (MINBPC(enc) - 1)) {
michael@0 1322 n &= ~(MINBPC(enc) - 1);
michael@0 1323 end = ptr + n;
michael@0 1324 }
michael@0 1325 }
michael@0 1326 while (ptr != end) {
michael@0 1327 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1328 INVALID_CASES(ptr, nextTokPtr)
michael@0 1329 case BT_LT:
michael@0 1330 if ((ptr += MINBPC(enc)) == end)
michael@0 1331 return XML_TOK_PARTIAL;
michael@0 1332 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
michael@0 1333 if ((ptr += MINBPC(enc)) == end)
michael@0 1334 return XML_TOK_PARTIAL;
michael@0 1335 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
michael@0 1336 ++level;
michael@0 1337 ptr += MINBPC(enc);
michael@0 1338 }
michael@0 1339 }
michael@0 1340 break;
michael@0 1341 case BT_RSQB:
michael@0 1342 if ((ptr += MINBPC(enc)) == end)
michael@0 1343 return XML_TOK_PARTIAL;
michael@0 1344 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
michael@0 1345 if ((ptr += MINBPC(enc)) == end)
michael@0 1346 return XML_TOK_PARTIAL;
michael@0 1347 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
michael@0 1348 ptr += MINBPC(enc);
michael@0 1349 if (level == 0) {
michael@0 1350 *nextTokPtr = ptr;
michael@0 1351 return XML_TOK_IGNORE_SECT;
michael@0 1352 }
michael@0 1353 --level;
michael@0 1354 }
michael@0 1355 }
michael@0 1356 break;
michael@0 1357 default:
michael@0 1358 ptr += MINBPC(enc);
michael@0 1359 break;
michael@0 1360 }
michael@0 1361 }
michael@0 1362 return XML_TOK_PARTIAL;
michael@0 1363 }
michael@0 1364
michael@0 1365 #endif /* XML_DTD */
michael@0 1366
michael@0 1367 static int PTRCALL
michael@0 1368 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
michael@0 1369 const char **badPtr)
michael@0 1370 {
michael@0 1371 ptr += MINBPC(enc);
michael@0 1372 end -= MINBPC(enc);
michael@0 1373 for (; ptr != end; ptr += MINBPC(enc)) {
michael@0 1374 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1375 case BT_DIGIT:
michael@0 1376 case BT_HEX:
michael@0 1377 case BT_MINUS:
michael@0 1378 case BT_APOS:
michael@0 1379 case BT_LPAR:
michael@0 1380 case BT_RPAR:
michael@0 1381 case BT_PLUS:
michael@0 1382 case BT_COMMA:
michael@0 1383 case BT_SOL:
michael@0 1384 case BT_EQUALS:
michael@0 1385 case BT_QUEST:
michael@0 1386 case BT_CR:
michael@0 1387 case BT_LF:
michael@0 1388 case BT_SEMI:
michael@0 1389 case BT_EXCL:
michael@0 1390 case BT_AST:
michael@0 1391 case BT_PERCNT:
michael@0 1392 case BT_NUM:
michael@0 1393 #ifdef XML_NS
michael@0 1394 case BT_COLON:
michael@0 1395 #endif
michael@0 1396 break;
michael@0 1397 case BT_S:
michael@0 1398 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
michael@0 1399 *badPtr = ptr;
michael@0 1400 return 0;
michael@0 1401 }
michael@0 1402 break;
michael@0 1403 case BT_NAME:
michael@0 1404 case BT_NMSTRT:
michael@0 1405 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
michael@0 1406 break;
michael@0 1407 default:
michael@0 1408 switch (BYTE_TO_ASCII(enc, ptr)) {
michael@0 1409 case 0x24: /* $ */
michael@0 1410 case 0x40: /* @ */
michael@0 1411 break;
michael@0 1412 default:
michael@0 1413 *badPtr = ptr;
michael@0 1414 return 0;
michael@0 1415 }
michael@0 1416 break;
michael@0 1417 }
michael@0 1418 }
michael@0 1419 return 1;
michael@0 1420 }
michael@0 1421
michael@0 1422 /* This must only be called for a well-formed start-tag or empty
michael@0 1423 element tag. Returns the number of attributes. Pointers to the
michael@0 1424 first attsMax attributes are stored in atts.
michael@0 1425 */
michael@0 1426
michael@0 1427 static int PTRCALL
michael@0 1428 PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
michael@0 1429 int attsMax, ATTRIBUTE *atts)
michael@0 1430 {
michael@0 1431 enum { other, inName, inValue } state = inName;
michael@0 1432 int nAtts = 0;
michael@0 1433 int open = 0; /* defined when state == inValue;
michael@0 1434 initialization just to shut up compilers */
michael@0 1435
michael@0 1436 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
michael@0 1437 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1438 #define START_NAME \
michael@0 1439 if (state == other) { \
michael@0 1440 if (nAtts < attsMax) { \
michael@0 1441 atts[nAtts].name = ptr; \
michael@0 1442 atts[nAtts].normalized = 1; \
michael@0 1443 } \
michael@0 1444 state = inName; \
michael@0 1445 }
michael@0 1446 #define LEAD_CASE(n) \
michael@0 1447 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
michael@0 1448 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 1449 #undef LEAD_CASE
michael@0 1450 case BT_NONASCII:
michael@0 1451 case BT_NMSTRT:
michael@0 1452 case BT_HEX:
michael@0 1453 START_NAME
michael@0 1454 break;
michael@0 1455 #undef START_NAME
michael@0 1456 case BT_QUOT:
michael@0 1457 if (state != inValue) {
michael@0 1458 if (nAtts < attsMax)
michael@0 1459 atts[nAtts].valuePtr = ptr + MINBPC(enc);
michael@0 1460 state = inValue;
michael@0 1461 open = BT_QUOT;
michael@0 1462 }
michael@0 1463 else if (open == BT_QUOT) {
michael@0 1464 state = other;
michael@0 1465 if (nAtts < attsMax)
michael@0 1466 atts[nAtts].valueEnd = ptr;
michael@0 1467 nAtts++;
michael@0 1468 }
michael@0 1469 break;
michael@0 1470 case BT_APOS:
michael@0 1471 if (state != inValue) {
michael@0 1472 if (nAtts < attsMax)
michael@0 1473 atts[nAtts].valuePtr = ptr + MINBPC(enc);
michael@0 1474 state = inValue;
michael@0 1475 open = BT_APOS;
michael@0 1476 }
michael@0 1477 else if (open == BT_APOS) {
michael@0 1478 state = other;
michael@0 1479 if (nAtts < attsMax)
michael@0 1480 atts[nAtts].valueEnd = ptr;
michael@0 1481 nAtts++;
michael@0 1482 }
michael@0 1483 break;
michael@0 1484 case BT_AMP:
michael@0 1485 if (nAtts < attsMax)
michael@0 1486 atts[nAtts].normalized = 0;
michael@0 1487 break;
michael@0 1488 case BT_S:
michael@0 1489 if (state == inName)
michael@0 1490 state = other;
michael@0 1491 else if (state == inValue
michael@0 1492 && nAtts < attsMax
michael@0 1493 && atts[nAtts].normalized
michael@0 1494 && (ptr == atts[nAtts].valuePtr
michael@0 1495 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
michael@0 1496 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
michael@0 1497 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
michael@0 1498 atts[nAtts].normalized = 0;
michael@0 1499 break;
michael@0 1500 case BT_CR: case BT_LF:
michael@0 1501 /* This case ensures that the first attribute name is counted
michael@0 1502 Apart from that we could just change state on the quote. */
michael@0 1503 if (state == inName)
michael@0 1504 state = other;
michael@0 1505 else if (state == inValue && nAtts < attsMax)
michael@0 1506 atts[nAtts].normalized = 0;
michael@0 1507 break;
michael@0 1508 case BT_GT:
michael@0 1509 case BT_SOL:
michael@0 1510 if (state != inValue)
michael@0 1511 return nAtts;
michael@0 1512 break;
michael@0 1513 default:
michael@0 1514 break;
michael@0 1515 }
michael@0 1516 }
michael@0 1517 /* not reached */
michael@0 1518 }
michael@0 1519
michael@0 1520 static int PTRFASTCALL
michael@0 1521 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
michael@0 1522 {
michael@0 1523 int result = 0;
michael@0 1524 /* skip &# */
michael@0 1525 ptr += 2*MINBPC(enc);
michael@0 1526 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
michael@0 1527 for (ptr += MINBPC(enc);
michael@0 1528 !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
michael@0 1529 ptr += MINBPC(enc)) {
michael@0 1530 int c = BYTE_TO_ASCII(enc, ptr);
michael@0 1531 switch (c) {
michael@0 1532 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
michael@0 1533 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
michael@0 1534 result <<= 4;
michael@0 1535 result |= (c - ASCII_0);
michael@0 1536 break;
michael@0 1537 case ASCII_A: case ASCII_B: case ASCII_C:
michael@0 1538 case ASCII_D: case ASCII_E: case ASCII_F:
michael@0 1539 result <<= 4;
michael@0 1540 result += 10 + (c - ASCII_A);
michael@0 1541 break;
michael@0 1542 case ASCII_a: case ASCII_b: case ASCII_c:
michael@0 1543 case ASCII_d: case ASCII_e: case ASCII_f:
michael@0 1544 result <<= 4;
michael@0 1545 result += 10 + (c - ASCII_a);
michael@0 1546 break;
michael@0 1547 }
michael@0 1548 if (result >= 0x110000)
michael@0 1549 return -1;
michael@0 1550 }
michael@0 1551 }
michael@0 1552 else {
michael@0 1553 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
michael@0 1554 int c = BYTE_TO_ASCII(enc, ptr);
michael@0 1555 result *= 10;
michael@0 1556 result += (c - ASCII_0);
michael@0 1557 if (result >= 0x110000)
michael@0 1558 return -1;
michael@0 1559 }
michael@0 1560 }
michael@0 1561 return checkCharRefNumber(result);
michael@0 1562 }
michael@0 1563
michael@0 1564 static int PTRCALL
michael@0 1565 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
michael@0 1566 const char *end)
michael@0 1567 {
michael@0 1568 switch ((end - ptr)/MINBPC(enc)) {
michael@0 1569 case 2:
michael@0 1570 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
michael@0 1571 switch (BYTE_TO_ASCII(enc, ptr)) {
michael@0 1572 case ASCII_l:
michael@0 1573 return ASCII_LT;
michael@0 1574 case ASCII_g:
michael@0 1575 return ASCII_GT;
michael@0 1576 }
michael@0 1577 }
michael@0 1578 break;
michael@0 1579 case 3:
michael@0 1580 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
michael@0 1581 ptr += MINBPC(enc);
michael@0 1582 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
michael@0 1583 ptr += MINBPC(enc);
michael@0 1584 if (CHAR_MATCHES(enc, ptr, ASCII_p))
michael@0 1585 return ASCII_AMP;
michael@0 1586 }
michael@0 1587 }
michael@0 1588 break;
michael@0 1589 case 4:
michael@0 1590 switch (BYTE_TO_ASCII(enc, ptr)) {
michael@0 1591 case ASCII_q:
michael@0 1592 ptr += MINBPC(enc);
michael@0 1593 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
michael@0 1594 ptr += MINBPC(enc);
michael@0 1595 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
michael@0 1596 ptr += MINBPC(enc);
michael@0 1597 if (CHAR_MATCHES(enc, ptr, ASCII_t))
michael@0 1598 return ASCII_QUOT;
michael@0 1599 }
michael@0 1600 }
michael@0 1601 break;
michael@0 1602 case ASCII_a:
michael@0 1603 ptr += MINBPC(enc);
michael@0 1604 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
michael@0 1605 ptr += MINBPC(enc);
michael@0 1606 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
michael@0 1607 ptr += MINBPC(enc);
michael@0 1608 if (CHAR_MATCHES(enc, ptr, ASCII_s))
michael@0 1609 return ASCII_APOS;
michael@0 1610 }
michael@0 1611 }
michael@0 1612 break;
michael@0 1613 }
michael@0 1614 }
michael@0 1615 return 0;
michael@0 1616 }
michael@0 1617
michael@0 1618 static int PTRCALL
michael@0 1619 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
michael@0 1620 {
michael@0 1621 for (;;) {
michael@0 1622 switch (BYTE_TYPE(enc, ptr1)) {
michael@0 1623 #define LEAD_CASE(n) \
michael@0 1624 case BT_LEAD ## n: \
michael@0 1625 if (*ptr1++ != *ptr2++) \
michael@0 1626 return 0;
michael@0 1627 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
michael@0 1628 #undef LEAD_CASE
michael@0 1629 /* fall through */
michael@0 1630 if (*ptr1++ != *ptr2++)
michael@0 1631 return 0;
michael@0 1632 break;
michael@0 1633 case BT_NONASCII:
michael@0 1634 case BT_NMSTRT:
michael@0 1635 #ifdef XML_NS
michael@0 1636 case BT_COLON:
michael@0 1637 #endif
michael@0 1638 case BT_HEX:
michael@0 1639 case BT_DIGIT:
michael@0 1640 case BT_NAME:
michael@0 1641 case BT_MINUS:
michael@0 1642 if (*ptr2++ != *ptr1++)
michael@0 1643 return 0;
michael@0 1644 if (MINBPC(enc) > 1) {
michael@0 1645 if (*ptr2++ != *ptr1++)
michael@0 1646 return 0;
michael@0 1647 if (MINBPC(enc) > 2) {
michael@0 1648 if (*ptr2++ != *ptr1++)
michael@0 1649 return 0;
michael@0 1650 if (MINBPC(enc) > 3) {
michael@0 1651 if (*ptr2++ != *ptr1++)
michael@0 1652 return 0;
michael@0 1653 }
michael@0 1654 }
michael@0 1655 }
michael@0 1656 break;
michael@0 1657 default:
michael@0 1658 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
michael@0 1659 return 1;
michael@0 1660 switch (BYTE_TYPE(enc, ptr2)) {
michael@0 1661 case BT_LEAD2:
michael@0 1662 case BT_LEAD3:
michael@0 1663 case BT_LEAD4:
michael@0 1664 case BT_NONASCII:
michael@0 1665 case BT_NMSTRT:
michael@0 1666 #ifdef XML_NS
michael@0 1667 case BT_COLON:
michael@0 1668 #endif
michael@0 1669 case BT_HEX:
michael@0 1670 case BT_DIGIT:
michael@0 1671 case BT_NAME:
michael@0 1672 case BT_MINUS:
michael@0 1673 return 0;
michael@0 1674 default:
michael@0 1675 return 1;
michael@0 1676 }
michael@0 1677 }
michael@0 1678 }
michael@0 1679 /* not reached */
michael@0 1680 }
michael@0 1681
michael@0 1682 static int PTRCALL
michael@0 1683 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
michael@0 1684 const char *end1, const char *ptr2)
michael@0 1685 {
michael@0 1686 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
michael@0 1687 if (ptr1 == end1)
michael@0 1688 return 0;
michael@0 1689 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
michael@0 1690 return 0;
michael@0 1691 }
michael@0 1692 return ptr1 == end1;
michael@0 1693 }
michael@0 1694
michael@0 1695 static int PTRFASTCALL
michael@0 1696 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
michael@0 1697 {
michael@0 1698 const char *start = ptr;
michael@0 1699 for (;;) {
michael@0 1700 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1701 #define LEAD_CASE(n) \
michael@0 1702 case BT_LEAD ## n: ptr += n; break;
michael@0 1703 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 1704 #undef LEAD_CASE
michael@0 1705 case BT_NONASCII:
michael@0 1706 case BT_NMSTRT:
michael@0 1707 #ifdef XML_NS
michael@0 1708 case BT_COLON:
michael@0 1709 #endif
michael@0 1710 case BT_HEX:
michael@0 1711 case BT_DIGIT:
michael@0 1712 case BT_NAME:
michael@0 1713 case BT_MINUS:
michael@0 1714 ptr += MINBPC(enc);
michael@0 1715 break;
michael@0 1716 default:
michael@0 1717 return (int)(ptr - start);
michael@0 1718 }
michael@0 1719 }
michael@0 1720 }
michael@0 1721
michael@0 1722 static const char * PTRFASTCALL
michael@0 1723 PREFIX(skipS)(const ENCODING *enc, const char *ptr)
michael@0 1724 {
michael@0 1725 for (;;) {
michael@0 1726 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1727 case BT_LF:
michael@0 1728 case BT_CR:
michael@0 1729 case BT_S:
michael@0 1730 ptr += MINBPC(enc);
michael@0 1731 break;
michael@0 1732 default:
michael@0 1733 return ptr;
michael@0 1734 }
michael@0 1735 }
michael@0 1736 }
michael@0 1737
michael@0 1738 static void PTRCALL
michael@0 1739 PREFIX(updatePosition)(const ENCODING *enc,
michael@0 1740 const char *ptr,
michael@0 1741 const char *end,
michael@0 1742 POSITION *pos)
michael@0 1743 {
michael@0 1744 while (ptr != end) {
michael@0 1745 switch (BYTE_TYPE(enc, ptr)) {
michael@0 1746 #define LEAD_CASE(n) \
michael@0 1747 case BT_LEAD ## n: \
michael@0 1748 ptr += n; \
michael@0 1749 break;
michael@0 1750 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
michael@0 1751 #undef LEAD_CASE
michael@0 1752 case BT_LF:
michael@0 1753 pos->columnNumber = (XML_Size)-1;
michael@0 1754 pos->lineNumber++;
michael@0 1755 ptr += MINBPC(enc);
michael@0 1756 break;
michael@0 1757 case BT_CR:
michael@0 1758 pos->lineNumber++;
michael@0 1759 ptr += MINBPC(enc);
michael@0 1760 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
michael@0 1761 ptr += MINBPC(enc);
michael@0 1762 pos->columnNumber = (XML_Size)-1;
michael@0 1763 break;
michael@0 1764 default:
michael@0 1765 ptr += MINBPC(enc);
michael@0 1766 break;
michael@0 1767 }
michael@0 1768 pos->columnNumber++;
michael@0 1769 }
michael@0 1770 }
michael@0 1771
michael@0 1772 #undef DO_LEAD_CASE
michael@0 1773 #undef MULTIBYTE_CASES
michael@0 1774 #undef INVALID_CASES
michael@0 1775 #undef CHECK_NAME_CASE
michael@0 1776 #undef CHECK_NAME_CASES
michael@0 1777 #undef CHECK_NMSTRT_CASE
michael@0 1778 #undef CHECK_NMSTRT_CASES
michael@0 1779

mercurial