Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
michael@0 | 2 | See the file COPYING for copying permission. |
michael@0 | 3 | */ |
michael@0 | 4 | |
michael@0 | 5 | #ifndef IS_INVALID_CHAR |
michael@0 | 6 | #define IS_INVALID_CHAR(enc, ptr, n) (0) |
michael@0 | 7 | #endif |
michael@0 | 8 | |
michael@0 | 9 | #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
michael@0 | 10 | case BT_LEAD ## n: \ |
michael@0 | 11 | if (end - ptr < n) \ |
michael@0 | 12 | return XML_TOK_PARTIAL_CHAR; \ |
michael@0 | 13 | if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
michael@0 | 14 | *(nextTokPtr) = (ptr); \ |
michael@0 | 15 | return XML_TOK_INVALID; \ |
michael@0 | 16 | } \ |
michael@0 | 17 | ptr += n; \ |
michael@0 | 18 | break; |
michael@0 | 19 | |
michael@0 | 20 | #define INVALID_CASES(ptr, nextTokPtr) \ |
michael@0 | 21 | INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
michael@0 | 22 | INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
michael@0 | 23 | INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
michael@0 | 24 | case BT_NONXML: \ |
michael@0 | 25 | case BT_MALFORM: \ |
michael@0 | 26 | case BT_TRAIL: \ |
michael@0 | 27 | *(nextTokPtr) = (ptr); \ |
michael@0 | 28 | return XML_TOK_INVALID; |
michael@0 | 29 | |
michael@0 | 30 | #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
michael@0 | 31 | case BT_LEAD ## n: \ |
michael@0 | 32 | if (end - ptr < n) \ |
michael@0 | 33 | return XML_TOK_PARTIAL_CHAR; \ |
michael@0 | 34 | if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
michael@0 | 35 | *nextTokPtr = ptr; \ |
michael@0 | 36 | return XML_TOK_INVALID; \ |
michael@0 | 37 | } \ |
michael@0 | 38 | ptr += n; \ |
michael@0 | 39 | break; |
michael@0 | 40 | |
michael@0 | 41 | #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
michael@0 | 42 | case BT_NONASCII: \ |
michael@0 | 43 | if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
michael@0 | 44 | *nextTokPtr = ptr; \ |
michael@0 | 45 | return XML_TOK_INVALID; \ |
michael@0 | 46 | } \ |
michael@0 | 47 | case BT_NMSTRT: \ |
michael@0 | 48 | case BT_HEX: \ |
michael@0 | 49 | case BT_DIGIT: \ |
michael@0 | 50 | case BT_NAME: \ |
michael@0 | 51 | case BT_MINUS: \ |
michael@0 | 52 | ptr += MINBPC(enc); \ |
michael@0 | 53 | break; \ |
michael@0 | 54 | CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
michael@0 | 55 | CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
michael@0 | 56 | CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
michael@0 | 57 | |
michael@0 | 58 | #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
michael@0 | 59 | case BT_LEAD ## n: \ |
michael@0 | 60 | if (end - ptr < n) \ |
michael@0 | 61 | return XML_TOK_PARTIAL_CHAR; \ |
michael@0 | 62 | if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
michael@0 | 63 | *nextTokPtr = ptr; \ |
michael@0 | 64 | return XML_TOK_INVALID; \ |
michael@0 | 65 | } \ |
michael@0 | 66 | ptr += n; \ |
michael@0 | 67 | break; |
michael@0 | 68 | |
michael@0 | 69 | #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
michael@0 | 70 | case BT_NONASCII: \ |
michael@0 | 71 | if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
michael@0 | 72 | *nextTokPtr = ptr; \ |
michael@0 | 73 | return XML_TOK_INVALID; \ |
michael@0 | 74 | } \ |
michael@0 | 75 | case BT_NMSTRT: \ |
michael@0 | 76 | case BT_HEX: \ |
michael@0 | 77 | ptr += MINBPC(enc); \ |
michael@0 | 78 | break; \ |
michael@0 | 79 | CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
michael@0 | 80 | CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
michael@0 | 81 | CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
michael@0 | 82 | |
michael@0 | 83 | #ifndef PREFIX |
michael@0 | 84 | #define PREFIX(ident) ident |
michael@0 | 85 | #endif |
michael@0 | 86 | |
michael@0 | 87 | /* ptr points to character following "<!-" */ |
michael@0 | 88 | |
michael@0 | 89 | static int PTRCALL |
michael@0 | 90 | PREFIX(scanComment)(const ENCODING *enc, const char *ptr, |
michael@0 | 91 | const char *end, const char **nextTokPtr) |
michael@0 | 92 | { |
michael@0 | 93 | if (ptr != end) { |
michael@0 | 94 | if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
michael@0 | 95 | *nextTokPtr = ptr; |
michael@0 | 96 | return XML_TOK_INVALID; |
michael@0 | 97 | } |
michael@0 | 98 | ptr += MINBPC(enc); |
michael@0 | 99 | while (ptr != end) { |
michael@0 | 100 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 101 | INVALID_CASES(ptr, nextTokPtr) |
michael@0 | 102 | case BT_MINUS: |
michael@0 | 103 | if ((ptr += MINBPC(enc)) == end) |
michael@0 | 104 | return XML_TOK_PARTIAL; |
michael@0 | 105 | if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
michael@0 | 106 | if ((ptr += MINBPC(enc)) == end) |
michael@0 | 107 | return XML_TOK_PARTIAL; |
michael@0 | 108 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 109 | *nextTokPtr = ptr; |
michael@0 | 110 | return XML_TOK_INVALID; |
michael@0 | 111 | } |
michael@0 | 112 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 113 | return XML_TOK_COMMENT; |
michael@0 | 114 | } |
michael@0 | 115 | break; |
michael@0 | 116 | default: |
michael@0 | 117 | ptr += MINBPC(enc); |
michael@0 | 118 | break; |
michael@0 | 119 | } |
michael@0 | 120 | } |
michael@0 | 121 | } |
michael@0 | 122 | return XML_TOK_PARTIAL; |
michael@0 | 123 | } |
michael@0 | 124 | |
michael@0 | 125 | /* ptr points to character following "<!" */ |
michael@0 | 126 | |
michael@0 | 127 | static int PTRCALL |
michael@0 | 128 | PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, |
michael@0 | 129 | const char *end, const char **nextTokPtr) |
michael@0 | 130 | { |
michael@0 | 131 | if (ptr == end) |
michael@0 | 132 | return XML_TOK_PARTIAL; |
michael@0 | 133 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 134 | case BT_MINUS: |
michael@0 | 135 | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 136 | case BT_LSQB: |
michael@0 | 137 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 138 | return XML_TOK_COND_SECT_OPEN; |
michael@0 | 139 | case BT_NMSTRT: |
michael@0 | 140 | case BT_HEX: |
michael@0 | 141 | ptr += MINBPC(enc); |
michael@0 | 142 | break; |
michael@0 | 143 | default: |
michael@0 | 144 | *nextTokPtr = ptr; |
michael@0 | 145 | return XML_TOK_INVALID; |
michael@0 | 146 | } |
michael@0 | 147 | while (ptr != end) { |
michael@0 | 148 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 149 | case BT_PERCNT: |
michael@0 | 150 | if (ptr + MINBPC(enc) == end) |
michael@0 | 151 | return XML_TOK_PARTIAL; |
michael@0 | 152 | /* don't allow <!ENTITY% foo "whatever"> */ |
michael@0 | 153 | switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
michael@0 | 154 | case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
michael@0 | 155 | *nextTokPtr = ptr; |
michael@0 | 156 | return XML_TOK_INVALID; |
michael@0 | 157 | } |
michael@0 | 158 | /* fall through */ |
michael@0 | 159 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 160 | *nextTokPtr = ptr; |
michael@0 | 161 | return XML_TOK_DECL_OPEN; |
michael@0 | 162 | case BT_NMSTRT: |
michael@0 | 163 | case BT_HEX: |
michael@0 | 164 | ptr += MINBPC(enc); |
michael@0 | 165 | break; |
michael@0 | 166 | default: |
michael@0 | 167 | *nextTokPtr = ptr; |
michael@0 | 168 | return XML_TOK_INVALID; |
michael@0 | 169 | } |
michael@0 | 170 | } |
michael@0 | 171 | return XML_TOK_PARTIAL; |
michael@0 | 172 | } |
michael@0 | 173 | |
michael@0 | 174 | static int PTRCALL |
michael@0 | 175 | PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, |
michael@0 | 176 | const char *end, int *tokPtr) |
michael@0 | 177 | { |
michael@0 | 178 | int upper = 0; |
michael@0 | 179 | *tokPtr = XML_TOK_PI; |
michael@0 | 180 | if (end - ptr != MINBPC(enc)*3) |
michael@0 | 181 | return 1; |
michael@0 | 182 | switch (BYTE_TO_ASCII(enc, ptr)) { |
michael@0 | 183 | case ASCII_x: |
michael@0 | 184 | break; |
michael@0 | 185 | case ASCII_X: |
michael@0 | 186 | upper = 1; |
michael@0 | 187 | break; |
michael@0 | 188 | default: |
michael@0 | 189 | return 1; |
michael@0 | 190 | } |
michael@0 | 191 | ptr += MINBPC(enc); |
michael@0 | 192 | switch (BYTE_TO_ASCII(enc, ptr)) { |
michael@0 | 193 | case ASCII_m: |
michael@0 | 194 | break; |
michael@0 | 195 | case ASCII_M: |
michael@0 | 196 | upper = 1; |
michael@0 | 197 | break; |
michael@0 | 198 | default: |
michael@0 | 199 | return 1; |
michael@0 | 200 | } |
michael@0 | 201 | ptr += MINBPC(enc); |
michael@0 | 202 | switch (BYTE_TO_ASCII(enc, ptr)) { |
michael@0 | 203 | case ASCII_l: |
michael@0 | 204 | break; |
michael@0 | 205 | case ASCII_L: |
michael@0 | 206 | upper = 1; |
michael@0 | 207 | break; |
michael@0 | 208 | default: |
michael@0 | 209 | return 1; |
michael@0 | 210 | } |
michael@0 | 211 | if (upper) |
michael@0 | 212 | return 0; |
michael@0 | 213 | *tokPtr = XML_TOK_XML_DECL; |
michael@0 | 214 | return 1; |
michael@0 | 215 | } |
michael@0 | 216 | |
michael@0 | 217 | /* ptr points to character following "<?" */ |
michael@0 | 218 | |
michael@0 | 219 | static int PTRCALL |
michael@0 | 220 | PREFIX(scanPi)(const ENCODING *enc, const char *ptr, |
michael@0 | 221 | const char *end, const char **nextTokPtr) |
michael@0 | 222 | { |
michael@0 | 223 | int tok; |
michael@0 | 224 | const char *target = ptr; |
michael@0 | 225 | if (ptr == end) |
michael@0 | 226 | return XML_TOK_PARTIAL; |
michael@0 | 227 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 228 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 229 | default: |
michael@0 | 230 | *nextTokPtr = ptr; |
michael@0 | 231 | return XML_TOK_INVALID; |
michael@0 | 232 | } |
michael@0 | 233 | while (ptr != end) { |
michael@0 | 234 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 235 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 236 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 237 | if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
michael@0 | 238 | *nextTokPtr = ptr; |
michael@0 | 239 | return XML_TOK_INVALID; |
michael@0 | 240 | } |
michael@0 | 241 | ptr += MINBPC(enc); |
michael@0 | 242 | while (ptr != end) { |
michael@0 | 243 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 244 | INVALID_CASES(ptr, nextTokPtr) |
michael@0 | 245 | case BT_QUEST: |
michael@0 | 246 | ptr += MINBPC(enc); |
michael@0 | 247 | if (ptr == end) |
michael@0 | 248 | return XML_TOK_PARTIAL; |
michael@0 | 249 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 250 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 251 | return tok; |
michael@0 | 252 | } |
michael@0 | 253 | break; |
michael@0 | 254 | default: |
michael@0 | 255 | ptr += MINBPC(enc); |
michael@0 | 256 | break; |
michael@0 | 257 | } |
michael@0 | 258 | } |
michael@0 | 259 | return XML_TOK_PARTIAL; |
michael@0 | 260 | case BT_QUEST: |
michael@0 | 261 | if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
michael@0 | 262 | *nextTokPtr = ptr; |
michael@0 | 263 | return XML_TOK_INVALID; |
michael@0 | 264 | } |
michael@0 | 265 | ptr += MINBPC(enc); |
michael@0 | 266 | if (ptr == end) |
michael@0 | 267 | return XML_TOK_PARTIAL; |
michael@0 | 268 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 269 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 270 | return tok; |
michael@0 | 271 | } |
michael@0 | 272 | /* fall through */ |
michael@0 | 273 | default: |
michael@0 | 274 | *nextTokPtr = ptr; |
michael@0 | 275 | return XML_TOK_INVALID; |
michael@0 | 276 | } |
michael@0 | 277 | } |
michael@0 | 278 | return XML_TOK_PARTIAL; |
michael@0 | 279 | } |
michael@0 | 280 | |
michael@0 | 281 | static int PTRCALL |
michael@0 | 282 | PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, |
michael@0 | 283 | const char *end, const char **nextTokPtr) |
michael@0 | 284 | { |
michael@0 | 285 | static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, |
michael@0 | 286 | ASCII_T, ASCII_A, ASCII_LSQB }; |
michael@0 | 287 | int i; |
michael@0 | 288 | /* CDATA[ */ |
michael@0 | 289 | if (end - ptr < 6 * MINBPC(enc)) |
michael@0 | 290 | return XML_TOK_PARTIAL; |
michael@0 | 291 | for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
michael@0 | 292 | if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { |
michael@0 | 293 | *nextTokPtr = ptr; |
michael@0 | 294 | return XML_TOK_INVALID; |
michael@0 | 295 | } |
michael@0 | 296 | } |
michael@0 | 297 | *nextTokPtr = ptr; |
michael@0 | 298 | return XML_TOK_CDATA_SECT_OPEN; |
michael@0 | 299 | } |
michael@0 | 300 | |
michael@0 | 301 | static int PTRCALL |
michael@0 | 302 | PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, |
michael@0 | 303 | const char *end, const char **nextTokPtr) |
michael@0 | 304 | { |
michael@0 | 305 | if (ptr == end) |
michael@0 | 306 | return XML_TOK_NONE; |
michael@0 | 307 | if (MINBPC(enc) > 1) { |
michael@0 | 308 | size_t n = end - ptr; |
michael@0 | 309 | if (n & (MINBPC(enc) - 1)) { |
michael@0 | 310 | n &= ~(MINBPC(enc) - 1); |
michael@0 | 311 | if (n == 0) |
michael@0 | 312 | return XML_TOK_PARTIAL; |
michael@0 | 313 | end = ptr + n; |
michael@0 | 314 | } |
michael@0 | 315 | } |
michael@0 | 316 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 317 | case BT_RSQB: |
michael@0 | 318 | ptr += MINBPC(enc); |
michael@0 | 319 | if (ptr == end) |
michael@0 | 320 | return XML_TOK_PARTIAL; |
michael@0 | 321 | if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
michael@0 | 322 | break; |
michael@0 | 323 | ptr += MINBPC(enc); |
michael@0 | 324 | if (ptr == end) |
michael@0 | 325 | return XML_TOK_PARTIAL; |
michael@0 | 326 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 327 | ptr -= MINBPC(enc); |
michael@0 | 328 | break; |
michael@0 | 329 | } |
michael@0 | 330 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 331 | return XML_TOK_CDATA_SECT_CLOSE; |
michael@0 | 332 | case BT_CR: |
michael@0 | 333 | ptr += MINBPC(enc); |
michael@0 | 334 | if (ptr == end) |
michael@0 | 335 | return XML_TOK_PARTIAL; |
michael@0 | 336 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
michael@0 | 337 | ptr += MINBPC(enc); |
michael@0 | 338 | *nextTokPtr = ptr; |
michael@0 | 339 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 340 | case BT_LF: |
michael@0 | 341 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 342 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 343 | INVALID_CASES(ptr, nextTokPtr) |
michael@0 | 344 | default: |
michael@0 | 345 | ptr += MINBPC(enc); |
michael@0 | 346 | break; |
michael@0 | 347 | } |
michael@0 | 348 | while (ptr != end) { |
michael@0 | 349 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 350 | #define LEAD_CASE(n) \ |
michael@0 | 351 | case BT_LEAD ## n: \ |
michael@0 | 352 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
michael@0 | 353 | *nextTokPtr = ptr; \ |
michael@0 | 354 | return XML_TOK_DATA_CHARS; \ |
michael@0 | 355 | } \ |
michael@0 | 356 | ptr += n; \ |
michael@0 | 357 | break; |
michael@0 | 358 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 359 | #undef LEAD_CASE |
michael@0 | 360 | case BT_NONXML: |
michael@0 | 361 | case BT_MALFORM: |
michael@0 | 362 | case BT_TRAIL: |
michael@0 | 363 | case BT_CR: |
michael@0 | 364 | case BT_LF: |
michael@0 | 365 | case BT_RSQB: |
michael@0 | 366 | *nextTokPtr = ptr; |
michael@0 | 367 | return XML_TOK_DATA_CHARS; |
michael@0 | 368 | default: |
michael@0 | 369 | ptr += MINBPC(enc); |
michael@0 | 370 | break; |
michael@0 | 371 | } |
michael@0 | 372 | } |
michael@0 | 373 | *nextTokPtr = ptr; |
michael@0 | 374 | return XML_TOK_DATA_CHARS; |
michael@0 | 375 | } |
michael@0 | 376 | |
michael@0 | 377 | /* ptr points to character following "</" */ |
michael@0 | 378 | |
michael@0 | 379 | static int PTRCALL |
michael@0 | 380 | PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, |
michael@0 | 381 | const char *end, const char **nextTokPtr) |
michael@0 | 382 | { |
michael@0 | 383 | if (ptr == end) |
michael@0 | 384 | return XML_TOK_PARTIAL; |
michael@0 | 385 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 386 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 387 | default: |
michael@0 | 388 | *nextTokPtr = ptr; |
michael@0 | 389 | return XML_TOK_INVALID; |
michael@0 | 390 | } |
michael@0 | 391 | while (ptr != end) { |
michael@0 | 392 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 393 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 394 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 395 | for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
michael@0 | 396 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 397 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 398 | break; |
michael@0 | 399 | case BT_GT: |
michael@0 | 400 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 401 | return XML_TOK_END_TAG; |
michael@0 | 402 | default: |
michael@0 | 403 | *nextTokPtr = ptr; |
michael@0 | 404 | return XML_TOK_INVALID; |
michael@0 | 405 | } |
michael@0 | 406 | } |
michael@0 | 407 | return XML_TOK_PARTIAL; |
michael@0 | 408 | #ifdef XML_NS |
michael@0 | 409 | case BT_COLON: |
michael@0 | 410 | /* no need to check qname syntax here, |
michael@0 | 411 | since end-tag must match exactly */ |
michael@0 | 412 | ptr += MINBPC(enc); |
michael@0 | 413 | break; |
michael@0 | 414 | #endif |
michael@0 | 415 | case BT_GT: |
michael@0 | 416 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 417 | return XML_TOK_END_TAG; |
michael@0 | 418 | default: |
michael@0 | 419 | *nextTokPtr = ptr; |
michael@0 | 420 | return XML_TOK_INVALID; |
michael@0 | 421 | } |
michael@0 | 422 | } |
michael@0 | 423 | return XML_TOK_PARTIAL; |
michael@0 | 424 | } |
michael@0 | 425 | |
michael@0 | 426 | /* ptr points to character following "&#X" */ |
michael@0 | 427 | |
michael@0 | 428 | static int PTRCALL |
michael@0 | 429 | PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, |
michael@0 | 430 | const char *end, const char **nextTokPtr) |
michael@0 | 431 | { |
michael@0 | 432 | if (ptr != end) { |
michael@0 | 433 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 434 | case BT_DIGIT: |
michael@0 | 435 | case BT_HEX: |
michael@0 | 436 | break; |
michael@0 | 437 | default: |
michael@0 | 438 | *nextTokPtr = ptr; |
michael@0 | 439 | return XML_TOK_INVALID; |
michael@0 | 440 | } |
michael@0 | 441 | for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
michael@0 | 442 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 443 | case BT_DIGIT: |
michael@0 | 444 | case BT_HEX: |
michael@0 | 445 | break; |
michael@0 | 446 | case BT_SEMI: |
michael@0 | 447 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 448 | return XML_TOK_CHAR_REF; |
michael@0 | 449 | default: |
michael@0 | 450 | *nextTokPtr = ptr; |
michael@0 | 451 | return XML_TOK_INVALID; |
michael@0 | 452 | } |
michael@0 | 453 | } |
michael@0 | 454 | } |
michael@0 | 455 | return XML_TOK_PARTIAL; |
michael@0 | 456 | } |
michael@0 | 457 | |
michael@0 | 458 | /* ptr points to character following "&#" */ |
michael@0 | 459 | |
michael@0 | 460 | static int PTRCALL |
michael@0 | 461 | PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, |
michael@0 | 462 | const char *end, const char **nextTokPtr) |
michael@0 | 463 | { |
michael@0 | 464 | if (ptr != end) { |
michael@0 | 465 | if (CHAR_MATCHES(enc, ptr, ASCII_x)) |
michael@0 | 466 | return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 467 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 468 | case BT_DIGIT: |
michael@0 | 469 | break; |
michael@0 | 470 | default: |
michael@0 | 471 | *nextTokPtr = ptr; |
michael@0 | 472 | return XML_TOK_INVALID; |
michael@0 | 473 | } |
michael@0 | 474 | for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
michael@0 | 475 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 476 | case BT_DIGIT: |
michael@0 | 477 | break; |
michael@0 | 478 | case BT_SEMI: |
michael@0 | 479 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 480 | return XML_TOK_CHAR_REF; |
michael@0 | 481 | default: |
michael@0 | 482 | *nextTokPtr = ptr; |
michael@0 | 483 | return XML_TOK_INVALID; |
michael@0 | 484 | } |
michael@0 | 485 | } |
michael@0 | 486 | } |
michael@0 | 487 | return XML_TOK_PARTIAL; |
michael@0 | 488 | } |
michael@0 | 489 | |
michael@0 | 490 | /* ptr points to character following "&" */ |
michael@0 | 491 | |
michael@0 | 492 | static int PTRCALL |
michael@0 | 493 | PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 494 | const char **nextTokPtr) |
michael@0 | 495 | { |
michael@0 | 496 | if (ptr == end) |
michael@0 | 497 | return XML_TOK_PARTIAL; |
michael@0 | 498 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 499 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 500 | case BT_NUM: |
michael@0 | 501 | return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 502 | default: |
michael@0 | 503 | *nextTokPtr = ptr; |
michael@0 | 504 | return XML_TOK_INVALID; |
michael@0 | 505 | } |
michael@0 | 506 | while (ptr != end) { |
michael@0 | 507 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 508 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 509 | case BT_SEMI: |
michael@0 | 510 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 511 | return XML_TOK_ENTITY_REF; |
michael@0 | 512 | default: |
michael@0 | 513 | *nextTokPtr = ptr; |
michael@0 | 514 | return XML_TOK_INVALID; |
michael@0 | 515 | } |
michael@0 | 516 | } |
michael@0 | 517 | return XML_TOK_PARTIAL; |
michael@0 | 518 | } |
michael@0 | 519 | |
michael@0 | 520 | /* ptr points to character following first character of attribute name */ |
michael@0 | 521 | |
michael@0 | 522 | static int PTRCALL |
michael@0 | 523 | PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 524 | const char **nextTokPtr) |
michael@0 | 525 | { |
michael@0 | 526 | #ifdef XML_NS |
michael@0 | 527 | int hadColon = 0; |
michael@0 | 528 | #endif |
michael@0 | 529 | while (ptr != end) { |
michael@0 | 530 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 531 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 532 | #ifdef XML_NS |
michael@0 | 533 | case BT_COLON: |
michael@0 | 534 | if (hadColon) { |
michael@0 | 535 | *nextTokPtr = ptr; |
michael@0 | 536 | return XML_TOK_INVALID; |
michael@0 | 537 | } |
michael@0 | 538 | hadColon = 1; |
michael@0 | 539 | ptr += MINBPC(enc); |
michael@0 | 540 | if (ptr == end) |
michael@0 | 541 | return XML_TOK_PARTIAL; |
michael@0 | 542 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 543 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 544 | default: |
michael@0 | 545 | *nextTokPtr = ptr; |
michael@0 | 546 | return XML_TOK_INVALID; |
michael@0 | 547 | } |
michael@0 | 548 | break; |
michael@0 | 549 | #endif |
michael@0 | 550 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 551 | for (;;) { |
michael@0 | 552 | int t; |
michael@0 | 553 | |
michael@0 | 554 | ptr += MINBPC(enc); |
michael@0 | 555 | if (ptr == end) |
michael@0 | 556 | return XML_TOK_PARTIAL; |
michael@0 | 557 | t = BYTE_TYPE(enc, ptr); |
michael@0 | 558 | if (t == BT_EQUALS) |
michael@0 | 559 | break; |
michael@0 | 560 | switch (t) { |
michael@0 | 561 | case BT_S: |
michael@0 | 562 | case BT_LF: |
michael@0 | 563 | case BT_CR: |
michael@0 | 564 | break; |
michael@0 | 565 | default: |
michael@0 | 566 | *nextTokPtr = ptr; |
michael@0 | 567 | return XML_TOK_INVALID; |
michael@0 | 568 | } |
michael@0 | 569 | } |
michael@0 | 570 | /* fall through */ |
michael@0 | 571 | case BT_EQUALS: |
michael@0 | 572 | { |
michael@0 | 573 | int open; |
michael@0 | 574 | #ifdef XML_NS |
michael@0 | 575 | hadColon = 0; |
michael@0 | 576 | #endif |
michael@0 | 577 | for (;;) { |
michael@0 | 578 | ptr += MINBPC(enc); |
michael@0 | 579 | if (ptr == end) |
michael@0 | 580 | return XML_TOK_PARTIAL; |
michael@0 | 581 | open = BYTE_TYPE(enc, ptr); |
michael@0 | 582 | if (open == BT_QUOT || open == BT_APOS) |
michael@0 | 583 | break; |
michael@0 | 584 | switch (open) { |
michael@0 | 585 | case BT_S: |
michael@0 | 586 | case BT_LF: |
michael@0 | 587 | case BT_CR: |
michael@0 | 588 | break; |
michael@0 | 589 | default: |
michael@0 | 590 | *nextTokPtr = ptr; |
michael@0 | 591 | return XML_TOK_INVALID; |
michael@0 | 592 | } |
michael@0 | 593 | } |
michael@0 | 594 | ptr += MINBPC(enc); |
michael@0 | 595 | /* in attribute value */ |
michael@0 | 596 | for (;;) { |
michael@0 | 597 | int t; |
michael@0 | 598 | if (ptr == end) |
michael@0 | 599 | return XML_TOK_PARTIAL; |
michael@0 | 600 | t = BYTE_TYPE(enc, ptr); |
michael@0 | 601 | if (t == open) |
michael@0 | 602 | break; |
michael@0 | 603 | switch (t) { |
michael@0 | 604 | INVALID_CASES(ptr, nextTokPtr) |
michael@0 | 605 | case BT_AMP: |
michael@0 | 606 | { |
michael@0 | 607 | int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
michael@0 | 608 | if (tok <= 0) { |
michael@0 | 609 | if (tok == XML_TOK_INVALID) |
michael@0 | 610 | *nextTokPtr = ptr; |
michael@0 | 611 | return tok; |
michael@0 | 612 | } |
michael@0 | 613 | break; |
michael@0 | 614 | } |
michael@0 | 615 | case BT_LT: |
michael@0 | 616 | *nextTokPtr = ptr; |
michael@0 | 617 | return XML_TOK_INVALID; |
michael@0 | 618 | default: |
michael@0 | 619 | ptr += MINBPC(enc); |
michael@0 | 620 | break; |
michael@0 | 621 | } |
michael@0 | 622 | } |
michael@0 | 623 | ptr += MINBPC(enc); |
michael@0 | 624 | if (ptr == end) |
michael@0 | 625 | return XML_TOK_PARTIAL; |
michael@0 | 626 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 627 | case BT_S: |
michael@0 | 628 | case BT_CR: |
michael@0 | 629 | case BT_LF: |
michael@0 | 630 | break; |
michael@0 | 631 | case BT_SOL: |
michael@0 | 632 | goto sol; |
michael@0 | 633 | case BT_GT: |
michael@0 | 634 | goto gt; |
michael@0 | 635 | default: |
michael@0 | 636 | *nextTokPtr = ptr; |
michael@0 | 637 | return XML_TOK_INVALID; |
michael@0 | 638 | } |
michael@0 | 639 | /* ptr points to closing quote */ |
michael@0 | 640 | for (;;) { |
michael@0 | 641 | ptr += MINBPC(enc); |
michael@0 | 642 | if (ptr == end) |
michael@0 | 643 | return XML_TOK_PARTIAL; |
michael@0 | 644 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 645 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 646 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 647 | continue; |
michael@0 | 648 | case BT_GT: |
michael@0 | 649 | gt: |
michael@0 | 650 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 651 | return XML_TOK_START_TAG_WITH_ATTS; |
michael@0 | 652 | case BT_SOL: |
michael@0 | 653 | sol: |
michael@0 | 654 | ptr += MINBPC(enc); |
michael@0 | 655 | if (ptr == end) |
michael@0 | 656 | return XML_TOK_PARTIAL; |
michael@0 | 657 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 658 | *nextTokPtr = ptr; |
michael@0 | 659 | return XML_TOK_INVALID; |
michael@0 | 660 | } |
michael@0 | 661 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 662 | return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
michael@0 | 663 | default: |
michael@0 | 664 | *nextTokPtr = ptr; |
michael@0 | 665 | return XML_TOK_INVALID; |
michael@0 | 666 | } |
michael@0 | 667 | break; |
michael@0 | 668 | } |
michael@0 | 669 | break; |
michael@0 | 670 | } |
michael@0 | 671 | default: |
michael@0 | 672 | *nextTokPtr = ptr; |
michael@0 | 673 | return XML_TOK_INVALID; |
michael@0 | 674 | } |
michael@0 | 675 | } |
michael@0 | 676 | return XML_TOK_PARTIAL; |
michael@0 | 677 | } |
michael@0 | 678 | |
michael@0 | 679 | /* ptr points to character following "<" */ |
michael@0 | 680 | |
michael@0 | 681 | static int PTRCALL |
michael@0 | 682 | PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 683 | const char **nextTokPtr) |
michael@0 | 684 | { |
michael@0 | 685 | #ifdef XML_NS |
michael@0 | 686 | int hadColon; |
michael@0 | 687 | #endif |
michael@0 | 688 | if (ptr == end) |
michael@0 | 689 | return XML_TOK_PARTIAL; |
michael@0 | 690 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 691 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 692 | case BT_EXCL: |
michael@0 | 693 | if ((ptr += MINBPC(enc)) == end) |
michael@0 | 694 | return XML_TOK_PARTIAL; |
michael@0 | 695 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 696 | case BT_MINUS: |
michael@0 | 697 | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 698 | case BT_LSQB: |
michael@0 | 699 | return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), |
michael@0 | 700 | end, nextTokPtr); |
michael@0 | 701 | } |
michael@0 | 702 | *nextTokPtr = ptr; |
michael@0 | 703 | return XML_TOK_INVALID; |
michael@0 | 704 | case BT_QUEST: |
michael@0 | 705 | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 706 | case BT_SOL: |
michael@0 | 707 | return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 708 | default: |
michael@0 | 709 | *nextTokPtr = ptr; |
michael@0 | 710 | return XML_TOK_INVALID; |
michael@0 | 711 | } |
michael@0 | 712 | #ifdef XML_NS |
michael@0 | 713 | hadColon = 0; |
michael@0 | 714 | #endif |
michael@0 | 715 | /* we have a start-tag */ |
michael@0 | 716 | while (ptr != end) { |
michael@0 | 717 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 718 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 719 | #ifdef XML_NS |
michael@0 | 720 | case BT_COLON: |
michael@0 | 721 | if (hadColon) { |
michael@0 | 722 | *nextTokPtr = ptr; |
michael@0 | 723 | return XML_TOK_INVALID; |
michael@0 | 724 | } |
michael@0 | 725 | hadColon = 1; |
michael@0 | 726 | ptr += MINBPC(enc); |
michael@0 | 727 | if (ptr == end) |
michael@0 | 728 | return XML_TOK_PARTIAL; |
michael@0 | 729 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 730 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 731 | default: |
michael@0 | 732 | *nextTokPtr = ptr; |
michael@0 | 733 | return XML_TOK_INVALID; |
michael@0 | 734 | } |
michael@0 | 735 | break; |
michael@0 | 736 | #endif |
michael@0 | 737 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 738 | { |
michael@0 | 739 | ptr += MINBPC(enc); |
michael@0 | 740 | while (ptr != end) { |
michael@0 | 741 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 742 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 743 | case BT_GT: |
michael@0 | 744 | goto gt; |
michael@0 | 745 | case BT_SOL: |
michael@0 | 746 | goto sol; |
michael@0 | 747 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 748 | ptr += MINBPC(enc); |
michael@0 | 749 | continue; |
michael@0 | 750 | default: |
michael@0 | 751 | *nextTokPtr = ptr; |
michael@0 | 752 | return XML_TOK_INVALID; |
michael@0 | 753 | } |
michael@0 | 754 | return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
michael@0 | 755 | } |
michael@0 | 756 | return XML_TOK_PARTIAL; |
michael@0 | 757 | } |
michael@0 | 758 | case BT_GT: |
michael@0 | 759 | gt: |
michael@0 | 760 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 761 | return XML_TOK_START_TAG_NO_ATTS; |
michael@0 | 762 | case BT_SOL: |
michael@0 | 763 | sol: |
michael@0 | 764 | ptr += MINBPC(enc); |
michael@0 | 765 | if (ptr == end) |
michael@0 | 766 | return XML_TOK_PARTIAL; |
michael@0 | 767 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 768 | *nextTokPtr = ptr; |
michael@0 | 769 | return XML_TOK_INVALID; |
michael@0 | 770 | } |
michael@0 | 771 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 772 | return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
michael@0 | 773 | default: |
michael@0 | 774 | *nextTokPtr = ptr; |
michael@0 | 775 | return XML_TOK_INVALID; |
michael@0 | 776 | } |
michael@0 | 777 | } |
michael@0 | 778 | return XML_TOK_PARTIAL; |
michael@0 | 779 | } |
michael@0 | 780 | |
michael@0 | 781 | static int PTRCALL |
michael@0 | 782 | PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 783 | const char **nextTokPtr) |
michael@0 | 784 | { |
michael@0 | 785 | if (ptr == end) |
michael@0 | 786 | return XML_TOK_NONE; |
michael@0 | 787 | if (MINBPC(enc) > 1) { |
michael@0 | 788 | size_t n = end - ptr; |
michael@0 | 789 | if (n & (MINBPC(enc) - 1)) { |
michael@0 | 790 | n &= ~(MINBPC(enc) - 1); |
michael@0 | 791 | if (n == 0) |
michael@0 | 792 | return XML_TOK_PARTIAL; |
michael@0 | 793 | end = ptr + n; |
michael@0 | 794 | } |
michael@0 | 795 | } |
michael@0 | 796 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 797 | case BT_LT: |
michael@0 | 798 | return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 799 | case BT_AMP: |
michael@0 | 800 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 801 | case BT_CR: |
michael@0 | 802 | ptr += MINBPC(enc); |
michael@0 | 803 | if (ptr == end) |
michael@0 | 804 | return XML_TOK_TRAILING_CR; |
michael@0 | 805 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
michael@0 | 806 | ptr += MINBPC(enc); |
michael@0 | 807 | *nextTokPtr = ptr; |
michael@0 | 808 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 809 | case BT_LF: |
michael@0 | 810 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 811 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 812 | case BT_RSQB: |
michael@0 | 813 | ptr += MINBPC(enc); |
michael@0 | 814 | if (ptr == end) |
michael@0 | 815 | return XML_TOK_TRAILING_RSQB; |
michael@0 | 816 | if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
michael@0 | 817 | break; |
michael@0 | 818 | ptr += MINBPC(enc); |
michael@0 | 819 | if (ptr == end) |
michael@0 | 820 | return XML_TOK_TRAILING_RSQB; |
michael@0 | 821 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 822 | ptr -= MINBPC(enc); |
michael@0 | 823 | break; |
michael@0 | 824 | } |
michael@0 | 825 | *nextTokPtr = ptr; |
michael@0 | 826 | return XML_TOK_INVALID; |
michael@0 | 827 | INVALID_CASES(ptr, nextTokPtr) |
michael@0 | 828 | default: |
michael@0 | 829 | ptr += MINBPC(enc); |
michael@0 | 830 | break; |
michael@0 | 831 | } |
michael@0 | 832 | while (ptr != end) { |
michael@0 | 833 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 834 | #define LEAD_CASE(n) \ |
michael@0 | 835 | case BT_LEAD ## n: \ |
michael@0 | 836 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
michael@0 | 837 | *nextTokPtr = ptr; \ |
michael@0 | 838 | return XML_TOK_DATA_CHARS; \ |
michael@0 | 839 | } \ |
michael@0 | 840 | ptr += n; \ |
michael@0 | 841 | break; |
michael@0 | 842 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 843 | #undef LEAD_CASE |
michael@0 | 844 | case BT_RSQB: |
michael@0 | 845 | if (ptr + MINBPC(enc) != end) { |
michael@0 | 846 | if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { |
michael@0 | 847 | ptr += MINBPC(enc); |
michael@0 | 848 | break; |
michael@0 | 849 | } |
michael@0 | 850 | if (ptr + 2*MINBPC(enc) != end) { |
michael@0 | 851 | if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { |
michael@0 | 852 | ptr += MINBPC(enc); |
michael@0 | 853 | break; |
michael@0 | 854 | } |
michael@0 | 855 | *nextTokPtr = ptr + 2*MINBPC(enc); |
michael@0 | 856 | return XML_TOK_INVALID; |
michael@0 | 857 | } |
michael@0 | 858 | } |
michael@0 | 859 | /* fall through */ |
michael@0 | 860 | case BT_AMP: |
michael@0 | 861 | case BT_LT: |
michael@0 | 862 | case BT_NONXML: |
michael@0 | 863 | case BT_MALFORM: |
michael@0 | 864 | case BT_TRAIL: |
michael@0 | 865 | case BT_CR: |
michael@0 | 866 | case BT_LF: |
michael@0 | 867 | *nextTokPtr = ptr; |
michael@0 | 868 | return XML_TOK_DATA_CHARS; |
michael@0 | 869 | default: |
michael@0 | 870 | ptr += MINBPC(enc); |
michael@0 | 871 | break; |
michael@0 | 872 | } |
michael@0 | 873 | } |
michael@0 | 874 | *nextTokPtr = ptr; |
michael@0 | 875 | return XML_TOK_DATA_CHARS; |
michael@0 | 876 | } |
michael@0 | 877 | |
michael@0 | 878 | /* ptr points to character following "%" */ |
michael@0 | 879 | |
michael@0 | 880 | static int PTRCALL |
michael@0 | 881 | PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 882 | const char **nextTokPtr) |
michael@0 | 883 | { |
michael@0 | 884 | if (ptr == end) |
michael@0 | 885 | return -XML_TOK_PERCENT; |
michael@0 | 886 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 887 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 888 | case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
michael@0 | 889 | *nextTokPtr = ptr; |
michael@0 | 890 | return XML_TOK_PERCENT; |
michael@0 | 891 | default: |
michael@0 | 892 | *nextTokPtr = ptr; |
michael@0 | 893 | return XML_TOK_INVALID; |
michael@0 | 894 | } |
michael@0 | 895 | while (ptr != end) { |
michael@0 | 896 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 897 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 898 | case BT_SEMI: |
michael@0 | 899 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 900 | return XML_TOK_PARAM_ENTITY_REF; |
michael@0 | 901 | default: |
michael@0 | 902 | *nextTokPtr = ptr; |
michael@0 | 903 | return XML_TOK_INVALID; |
michael@0 | 904 | } |
michael@0 | 905 | } |
michael@0 | 906 | return XML_TOK_PARTIAL; |
michael@0 | 907 | } |
michael@0 | 908 | |
michael@0 | 909 | static int PTRCALL |
michael@0 | 910 | PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 911 | const char **nextTokPtr) |
michael@0 | 912 | { |
michael@0 | 913 | if (ptr == end) |
michael@0 | 914 | return XML_TOK_PARTIAL; |
michael@0 | 915 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 916 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 917 | default: |
michael@0 | 918 | *nextTokPtr = ptr; |
michael@0 | 919 | return XML_TOK_INVALID; |
michael@0 | 920 | } |
michael@0 | 921 | while (ptr != end) { |
michael@0 | 922 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 923 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 924 | case BT_CR: case BT_LF: case BT_S: |
michael@0 | 925 | case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
michael@0 | 926 | *nextTokPtr = ptr; |
michael@0 | 927 | return XML_TOK_POUND_NAME; |
michael@0 | 928 | default: |
michael@0 | 929 | *nextTokPtr = ptr; |
michael@0 | 930 | return XML_TOK_INVALID; |
michael@0 | 931 | } |
michael@0 | 932 | } |
michael@0 | 933 | return -XML_TOK_POUND_NAME; |
michael@0 | 934 | } |
michael@0 | 935 | |
michael@0 | 936 | static int PTRCALL |
michael@0 | 937 | PREFIX(scanLit)(int open, const ENCODING *enc, |
michael@0 | 938 | const char *ptr, const char *end, |
michael@0 | 939 | const char **nextTokPtr) |
michael@0 | 940 | { |
michael@0 | 941 | while (ptr != end) { |
michael@0 | 942 | int t = BYTE_TYPE(enc, ptr); |
michael@0 | 943 | switch (t) { |
michael@0 | 944 | INVALID_CASES(ptr, nextTokPtr) |
michael@0 | 945 | case BT_QUOT: |
michael@0 | 946 | case BT_APOS: |
michael@0 | 947 | ptr += MINBPC(enc); |
michael@0 | 948 | if (t != open) |
michael@0 | 949 | break; |
michael@0 | 950 | if (ptr == end) |
michael@0 | 951 | return -XML_TOK_LITERAL; |
michael@0 | 952 | *nextTokPtr = ptr; |
michael@0 | 953 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 954 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 955 | case BT_GT: case BT_PERCNT: case BT_LSQB: |
michael@0 | 956 | return XML_TOK_LITERAL; |
michael@0 | 957 | default: |
michael@0 | 958 | return XML_TOK_INVALID; |
michael@0 | 959 | } |
michael@0 | 960 | default: |
michael@0 | 961 | ptr += MINBPC(enc); |
michael@0 | 962 | break; |
michael@0 | 963 | } |
michael@0 | 964 | } |
michael@0 | 965 | return XML_TOK_PARTIAL; |
michael@0 | 966 | } |
michael@0 | 967 | |
michael@0 | 968 | static int PTRCALL |
michael@0 | 969 | PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 970 | const char **nextTokPtr) |
michael@0 | 971 | { |
michael@0 | 972 | int tok; |
michael@0 | 973 | if (ptr == end) |
michael@0 | 974 | return XML_TOK_NONE; |
michael@0 | 975 | if (MINBPC(enc) > 1) { |
michael@0 | 976 | size_t n = end - ptr; |
michael@0 | 977 | if (n & (MINBPC(enc) - 1)) { |
michael@0 | 978 | n &= ~(MINBPC(enc) - 1); |
michael@0 | 979 | if (n == 0) |
michael@0 | 980 | return XML_TOK_PARTIAL; |
michael@0 | 981 | end = ptr + n; |
michael@0 | 982 | } |
michael@0 | 983 | } |
michael@0 | 984 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 985 | case BT_QUOT: |
michael@0 | 986 | return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 987 | case BT_APOS: |
michael@0 | 988 | return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 989 | case BT_LT: |
michael@0 | 990 | { |
michael@0 | 991 | ptr += MINBPC(enc); |
michael@0 | 992 | if (ptr == end) |
michael@0 | 993 | return XML_TOK_PARTIAL; |
michael@0 | 994 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 995 | case BT_EXCL: |
michael@0 | 996 | return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 997 | case BT_QUEST: |
michael@0 | 998 | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 999 | case BT_NMSTRT: |
michael@0 | 1000 | case BT_HEX: |
michael@0 | 1001 | case BT_NONASCII: |
michael@0 | 1002 | case BT_LEAD2: |
michael@0 | 1003 | case BT_LEAD3: |
michael@0 | 1004 | case BT_LEAD4: |
michael@0 | 1005 | *nextTokPtr = ptr - MINBPC(enc); |
michael@0 | 1006 | return XML_TOK_INSTANCE_START; |
michael@0 | 1007 | } |
michael@0 | 1008 | *nextTokPtr = ptr; |
michael@0 | 1009 | return XML_TOK_INVALID; |
michael@0 | 1010 | } |
michael@0 | 1011 | case BT_CR: |
michael@0 | 1012 | if (ptr + MINBPC(enc) == end) { |
michael@0 | 1013 | *nextTokPtr = end; |
michael@0 | 1014 | /* indicate that this might be part of a CR/LF pair */ |
michael@0 | 1015 | return -XML_TOK_PROLOG_S; |
michael@0 | 1016 | } |
michael@0 | 1017 | /* fall through */ |
michael@0 | 1018 | case BT_S: case BT_LF: |
michael@0 | 1019 | for (;;) { |
michael@0 | 1020 | ptr += MINBPC(enc); |
michael@0 | 1021 | if (ptr == end) |
michael@0 | 1022 | break; |
michael@0 | 1023 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1024 | case BT_S: case BT_LF: |
michael@0 | 1025 | break; |
michael@0 | 1026 | case BT_CR: |
michael@0 | 1027 | /* don't split CR/LF pair */ |
michael@0 | 1028 | if (ptr + MINBPC(enc) != end) |
michael@0 | 1029 | break; |
michael@0 | 1030 | /* fall through */ |
michael@0 | 1031 | default: |
michael@0 | 1032 | *nextTokPtr = ptr; |
michael@0 | 1033 | return XML_TOK_PROLOG_S; |
michael@0 | 1034 | } |
michael@0 | 1035 | } |
michael@0 | 1036 | *nextTokPtr = ptr; |
michael@0 | 1037 | return XML_TOK_PROLOG_S; |
michael@0 | 1038 | case BT_PERCNT: |
michael@0 | 1039 | return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 1040 | case BT_COMMA: |
michael@0 | 1041 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1042 | return XML_TOK_COMMA; |
michael@0 | 1043 | case BT_LSQB: |
michael@0 | 1044 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1045 | return XML_TOK_OPEN_BRACKET; |
michael@0 | 1046 | case BT_RSQB: |
michael@0 | 1047 | ptr += MINBPC(enc); |
michael@0 | 1048 | if (ptr == end) |
michael@0 | 1049 | return -XML_TOK_CLOSE_BRACKET; |
michael@0 | 1050 | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
michael@0 | 1051 | if (ptr + MINBPC(enc) == end) |
michael@0 | 1052 | return XML_TOK_PARTIAL; |
michael@0 | 1053 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { |
michael@0 | 1054 | *nextTokPtr = ptr + 2*MINBPC(enc); |
michael@0 | 1055 | return XML_TOK_COND_SECT_CLOSE; |
michael@0 | 1056 | } |
michael@0 | 1057 | } |
michael@0 | 1058 | *nextTokPtr = ptr; |
michael@0 | 1059 | return XML_TOK_CLOSE_BRACKET; |
michael@0 | 1060 | case BT_LPAR: |
michael@0 | 1061 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1062 | return XML_TOK_OPEN_PAREN; |
michael@0 | 1063 | case BT_RPAR: |
michael@0 | 1064 | ptr += MINBPC(enc); |
michael@0 | 1065 | if (ptr == end) |
michael@0 | 1066 | return -XML_TOK_CLOSE_PAREN; |
michael@0 | 1067 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1068 | case BT_AST: |
michael@0 | 1069 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1070 | return XML_TOK_CLOSE_PAREN_ASTERISK; |
michael@0 | 1071 | case BT_QUEST: |
michael@0 | 1072 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1073 | return XML_TOK_CLOSE_PAREN_QUESTION; |
michael@0 | 1074 | case BT_PLUS: |
michael@0 | 1075 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1076 | return XML_TOK_CLOSE_PAREN_PLUS; |
michael@0 | 1077 | case BT_CR: case BT_LF: case BT_S: |
michael@0 | 1078 | case BT_GT: case BT_COMMA: case BT_VERBAR: |
michael@0 | 1079 | case BT_RPAR: |
michael@0 | 1080 | *nextTokPtr = ptr; |
michael@0 | 1081 | return XML_TOK_CLOSE_PAREN; |
michael@0 | 1082 | } |
michael@0 | 1083 | *nextTokPtr = ptr; |
michael@0 | 1084 | return XML_TOK_INVALID; |
michael@0 | 1085 | case BT_VERBAR: |
michael@0 | 1086 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1087 | return XML_TOK_OR; |
michael@0 | 1088 | case BT_GT: |
michael@0 | 1089 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1090 | return XML_TOK_DECL_CLOSE; |
michael@0 | 1091 | case BT_NUM: |
michael@0 | 1092 | return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 1093 | #define LEAD_CASE(n) \ |
michael@0 | 1094 | case BT_LEAD ## n: \ |
michael@0 | 1095 | if (end - ptr < n) \ |
michael@0 | 1096 | return XML_TOK_PARTIAL_CHAR; \ |
michael@0 | 1097 | if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
michael@0 | 1098 | ptr += n; \ |
michael@0 | 1099 | tok = XML_TOK_NAME; \ |
michael@0 | 1100 | break; \ |
michael@0 | 1101 | } \ |
michael@0 | 1102 | if (IS_NAME_CHAR(enc, ptr, n)) { \ |
michael@0 | 1103 | ptr += n; \ |
michael@0 | 1104 | tok = XML_TOK_NMTOKEN; \ |
michael@0 | 1105 | break; \ |
michael@0 | 1106 | } \ |
michael@0 | 1107 | *nextTokPtr = ptr; \ |
michael@0 | 1108 | return XML_TOK_INVALID; |
michael@0 | 1109 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 1110 | #undef LEAD_CASE |
michael@0 | 1111 | case BT_NMSTRT: |
michael@0 | 1112 | case BT_HEX: |
michael@0 | 1113 | tok = XML_TOK_NAME; |
michael@0 | 1114 | ptr += MINBPC(enc); |
michael@0 | 1115 | break; |
michael@0 | 1116 | case BT_DIGIT: |
michael@0 | 1117 | case BT_NAME: |
michael@0 | 1118 | case BT_MINUS: |
michael@0 | 1119 | #ifdef XML_NS |
michael@0 | 1120 | case BT_COLON: |
michael@0 | 1121 | #endif |
michael@0 | 1122 | tok = XML_TOK_NMTOKEN; |
michael@0 | 1123 | ptr += MINBPC(enc); |
michael@0 | 1124 | break; |
michael@0 | 1125 | case BT_NONASCII: |
michael@0 | 1126 | if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
michael@0 | 1127 | ptr += MINBPC(enc); |
michael@0 | 1128 | tok = XML_TOK_NAME; |
michael@0 | 1129 | break; |
michael@0 | 1130 | } |
michael@0 | 1131 | if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
michael@0 | 1132 | ptr += MINBPC(enc); |
michael@0 | 1133 | tok = XML_TOK_NMTOKEN; |
michael@0 | 1134 | break; |
michael@0 | 1135 | } |
michael@0 | 1136 | /* fall through */ |
michael@0 | 1137 | default: |
michael@0 | 1138 | *nextTokPtr = ptr; |
michael@0 | 1139 | return XML_TOK_INVALID; |
michael@0 | 1140 | } |
michael@0 | 1141 | while (ptr != end) { |
michael@0 | 1142 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1143 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 1144 | case BT_GT: case BT_RPAR: case BT_COMMA: |
michael@0 | 1145 | case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
michael@0 | 1146 | case BT_S: case BT_CR: case BT_LF: |
michael@0 | 1147 | *nextTokPtr = ptr; |
michael@0 | 1148 | return tok; |
michael@0 | 1149 | #ifdef XML_NS |
michael@0 | 1150 | case BT_COLON: |
michael@0 | 1151 | ptr += MINBPC(enc); |
michael@0 | 1152 | switch (tok) { |
michael@0 | 1153 | case XML_TOK_NAME: |
michael@0 | 1154 | if (ptr == end) |
michael@0 | 1155 | return XML_TOK_PARTIAL; |
michael@0 | 1156 | tok = XML_TOK_PREFIXED_NAME; |
michael@0 | 1157 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1158 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
michael@0 | 1159 | default: |
michael@0 | 1160 | tok = XML_TOK_NMTOKEN; |
michael@0 | 1161 | break; |
michael@0 | 1162 | } |
michael@0 | 1163 | break; |
michael@0 | 1164 | case XML_TOK_PREFIXED_NAME: |
michael@0 | 1165 | tok = XML_TOK_NMTOKEN; |
michael@0 | 1166 | break; |
michael@0 | 1167 | } |
michael@0 | 1168 | break; |
michael@0 | 1169 | #endif |
michael@0 | 1170 | case BT_PLUS: |
michael@0 | 1171 | if (tok == XML_TOK_NMTOKEN) { |
michael@0 | 1172 | *nextTokPtr = ptr; |
michael@0 | 1173 | return XML_TOK_INVALID; |
michael@0 | 1174 | } |
michael@0 | 1175 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1176 | return XML_TOK_NAME_PLUS; |
michael@0 | 1177 | case BT_AST: |
michael@0 | 1178 | if (tok == XML_TOK_NMTOKEN) { |
michael@0 | 1179 | *nextTokPtr = ptr; |
michael@0 | 1180 | return XML_TOK_INVALID; |
michael@0 | 1181 | } |
michael@0 | 1182 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1183 | return XML_TOK_NAME_ASTERISK; |
michael@0 | 1184 | case BT_QUEST: |
michael@0 | 1185 | if (tok == XML_TOK_NMTOKEN) { |
michael@0 | 1186 | *nextTokPtr = ptr; |
michael@0 | 1187 | return XML_TOK_INVALID; |
michael@0 | 1188 | } |
michael@0 | 1189 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1190 | return XML_TOK_NAME_QUESTION; |
michael@0 | 1191 | default: |
michael@0 | 1192 | *nextTokPtr = ptr; |
michael@0 | 1193 | return XML_TOK_INVALID; |
michael@0 | 1194 | } |
michael@0 | 1195 | } |
michael@0 | 1196 | return -tok; |
michael@0 | 1197 | } |
michael@0 | 1198 | |
michael@0 | 1199 | static int PTRCALL |
michael@0 | 1200 | PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, |
michael@0 | 1201 | const char *end, const char **nextTokPtr) |
michael@0 | 1202 | { |
michael@0 | 1203 | const char *start; |
michael@0 | 1204 | if (ptr == end) |
michael@0 | 1205 | return XML_TOK_NONE; |
michael@0 | 1206 | start = ptr; |
michael@0 | 1207 | while (ptr != end) { |
michael@0 | 1208 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1209 | #define LEAD_CASE(n) \ |
michael@0 | 1210 | case BT_LEAD ## n: ptr += n; break; |
michael@0 | 1211 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 1212 | #undef LEAD_CASE |
michael@0 | 1213 | case BT_AMP: |
michael@0 | 1214 | if (ptr == start) |
michael@0 | 1215 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 1216 | *nextTokPtr = ptr; |
michael@0 | 1217 | return XML_TOK_DATA_CHARS; |
michael@0 | 1218 | case BT_LT: |
michael@0 | 1219 | /* this is for inside entity references */ |
michael@0 | 1220 | *nextTokPtr = ptr; |
michael@0 | 1221 | return XML_TOK_INVALID; |
michael@0 | 1222 | case BT_LF: |
michael@0 | 1223 | if (ptr == start) { |
michael@0 | 1224 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1225 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 1226 | } |
michael@0 | 1227 | *nextTokPtr = ptr; |
michael@0 | 1228 | return XML_TOK_DATA_CHARS; |
michael@0 | 1229 | case BT_CR: |
michael@0 | 1230 | if (ptr == start) { |
michael@0 | 1231 | ptr += MINBPC(enc); |
michael@0 | 1232 | if (ptr == end) |
michael@0 | 1233 | return XML_TOK_TRAILING_CR; |
michael@0 | 1234 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
michael@0 | 1235 | ptr += MINBPC(enc); |
michael@0 | 1236 | *nextTokPtr = ptr; |
michael@0 | 1237 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 1238 | } |
michael@0 | 1239 | *nextTokPtr = ptr; |
michael@0 | 1240 | return XML_TOK_DATA_CHARS; |
michael@0 | 1241 | case BT_S: |
michael@0 | 1242 | if (ptr == start) { |
michael@0 | 1243 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1244 | return XML_TOK_ATTRIBUTE_VALUE_S; |
michael@0 | 1245 | } |
michael@0 | 1246 | *nextTokPtr = ptr; |
michael@0 | 1247 | return XML_TOK_DATA_CHARS; |
michael@0 | 1248 | default: |
michael@0 | 1249 | ptr += MINBPC(enc); |
michael@0 | 1250 | break; |
michael@0 | 1251 | } |
michael@0 | 1252 | } |
michael@0 | 1253 | *nextTokPtr = ptr; |
michael@0 | 1254 | return XML_TOK_DATA_CHARS; |
michael@0 | 1255 | } |
michael@0 | 1256 | |
michael@0 | 1257 | static int PTRCALL |
michael@0 | 1258 | PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, |
michael@0 | 1259 | const char *end, const char **nextTokPtr) |
michael@0 | 1260 | { |
michael@0 | 1261 | const char *start; |
michael@0 | 1262 | if (ptr == end) |
michael@0 | 1263 | return XML_TOK_NONE; |
michael@0 | 1264 | start = ptr; |
michael@0 | 1265 | while (ptr != end) { |
michael@0 | 1266 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1267 | #define LEAD_CASE(n) \ |
michael@0 | 1268 | case BT_LEAD ## n: ptr += n; break; |
michael@0 | 1269 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 1270 | #undef LEAD_CASE |
michael@0 | 1271 | case BT_AMP: |
michael@0 | 1272 | if (ptr == start) |
michael@0 | 1273 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
michael@0 | 1274 | *nextTokPtr = ptr; |
michael@0 | 1275 | return XML_TOK_DATA_CHARS; |
michael@0 | 1276 | case BT_PERCNT: |
michael@0 | 1277 | if (ptr == start) { |
michael@0 | 1278 | int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), |
michael@0 | 1279 | end, nextTokPtr); |
michael@0 | 1280 | return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; |
michael@0 | 1281 | } |
michael@0 | 1282 | *nextTokPtr = ptr; |
michael@0 | 1283 | return XML_TOK_DATA_CHARS; |
michael@0 | 1284 | case BT_LF: |
michael@0 | 1285 | if (ptr == start) { |
michael@0 | 1286 | *nextTokPtr = ptr + MINBPC(enc); |
michael@0 | 1287 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 1288 | } |
michael@0 | 1289 | *nextTokPtr = ptr; |
michael@0 | 1290 | return XML_TOK_DATA_CHARS; |
michael@0 | 1291 | case BT_CR: |
michael@0 | 1292 | if (ptr == start) { |
michael@0 | 1293 | ptr += MINBPC(enc); |
michael@0 | 1294 | if (ptr == end) |
michael@0 | 1295 | return XML_TOK_TRAILING_CR; |
michael@0 | 1296 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
michael@0 | 1297 | ptr += MINBPC(enc); |
michael@0 | 1298 | *nextTokPtr = ptr; |
michael@0 | 1299 | return XML_TOK_DATA_NEWLINE; |
michael@0 | 1300 | } |
michael@0 | 1301 | *nextTokPtr = ptr; |
michael@0 | 1302 | return XML_TOK_DATA_CHARS; |
michael@0 | 1303 | default: |
michael@0 | 1304 | ptr += MINBPC(enc); |
michael@0 | 1305 | break; |
michael@0 | 1306 | } |
michael@0 | 1307 | } |
michael@0 | 1308 | *nextTokPtr = ptr; |
michael@0 | 1309 | return XML_TOK_DATA_CHARS; |
michael@0 | 1310 | } |
michael@0 | 1311 | |
michael@0 | 1312 | #ifdef XML_DTD |
michael@0 | 1313 | |
michael@0 | 1314 | static int PTRCALL |
michael@0 | 1315 | PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, |
michael@0 | 1316 | const char *end, const char **nextTokPtr) |
michael@0 | 1317 | { |
michael@0 | 1318 | int level = 0; |
michael@0 | 1319 | if (MINBPC(enc) > 1) { |
michael@0 | 1320 | size_t n = end - ptr; |
michael@0 | 1321 | if (n & (MINBPC(enc) - 1)) { |
michael@0 | 1322 | n &= ~(MINBPC(enc) - 1); |
michael@0 | 1323 | end = ptr + n; |
michael@0 | 1324 | } |
michael@0 | 1325 | } |
michael@0 | 1326 | while (ptr != end) { |
michael@0 | 1327 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1328 | INVALID_CASES(ptr, nextTokPtr) |
michael@0 | 1329 | case BT_LT: |
michael@0 | 1330 | if ((ptr += MINBPC(enc)) == end) |
michael@0 | 1331 | return XML_TOK_PARTIAL; |
michael@0 | 1332 | if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { |
michael@0 | 1333 | if ((ptr += MINBPC(enc)) == end) |
michael@0 | 1334 | return XML_TOK_PARTIAL; |
michael@0 | 1335 | if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { |
michael@0 | 1336 | ++level; |
michael@0 | 1337 | ptr += MINBPC(enc); |
michael@0 | 1338 | } |
michael@0 | 1339 | } |
michael@0 | 1340 | break; |
michael@0 | 1341 | case BT_RSQB: |
michael@0 | 1342 | if ((ptr += MINBPC(enc)) == end) |
michael@0 | 1343 | return XML_TOK_PARTIAL; |
michael@0 | 1344 | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
michael@0 | 1345 | if ((ptr += MINBPC(enc)) == end) |
michael@0 | 1346 | return XML_TOK_PARTIAL; |
michael@0 | 1347 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
michael@0 | 1348 | ptr += MINBPC(enc); |
michael@0 | 1349 | if (level == 0) { |
michael@0 | 1350 | *nextTokPtr = ptr; |
michael@0 | 1351 | return XML_TOK_IGNORE_SECT; |
michael@0 | 1352 | } |
michael@0 | 1353 | --level; |
michael@0 | 1354 | } |
michael@0 | 1355 | } |
michael@0 | 1356 | break; |
michael@0 | 1357 | default: |
michael@0 | 1358 | ptr += MINBPC(enc); |
michael@0 | 1359 | break; |
michael@0 | 1360 | } |
michael@0 | 1361 | } |
michael@0 | 1362 | return XML_TOK_PARTIAL; |
michael@0 | 1363 | } |
michael@0 | 1364 | |
michael@0 | 1365 | #endif /* XML_DTD */ |
michael@0 | 1366 | |
michael@0 | 1367 | static int PTRCALL |
michael@0 | 1368 | PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
michael@0 | 1369 | const char **badPtr) |
michael@0 | 1370 | { |
michael@0 | 1371 | ptr += MINBPC(enc); |
michael@0 | 1372 | end -= MINBPC(enc); |
michael@0 | 1373 | for (; ptr != end; ptr += MINBPC(enc)) { |
michael@0 | 1374 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1375 | case BT_DIGIT: |
michael@0 | 1376 | case BT_HEX: |
michael@0 | 1377 | case BT_MINUS: |
michael@0 | 1378 | case BT_APOS: |
michael@0 | 1379 | case BT_LPAR: |
michael@0 | 1380 | case BT_RPAR: |
michael@0 | 1381 | case BT_PLUS: |
michael@0 | 1382 | case BT_COMMA: |
michael@0 | 1383 | case BT_SOL: |
michael@0 | 1384 | case BT_EQUALS: |
michael@0 | 1385 | case BT_QUEST: |
michael@0 | 1386 | case BT_CR: |
michael@0 | 1387 | case BT_LF: |
michael@0 | 1388 | case BT_SEMI: |
michael@0 | 1389 | case BT_EXCL: |
michael@0 | 1390 | case BT_AST: |
michael@0 | 1391 | case BT_PERCNT: |
michael@0 | 1392 | case BT_NUM: |
michael@0 | 1393 | #ifdef XML_NS |
michael@0 | 1394 | case BT_COLON: |
michael@0 | 1395 | #endif |
michael@0 | 1396 | break; |
michael@0 | 1397 | case BT_S: |
michael@0 | 1398 | if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { |
michael@0 | 1399 | *badPtr = ptr; |
michael@0 | 1400 | return 0; |
michael@0 | 1401 | } |
michael@0 | 1402 | break; |
michael@0 | 1403 | case BT_NAME: |
michael@0 | 1404 | case BT_NMSTRT: |
michael@0 | 1405 | if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
michael@0 | 1406 | break; |
michael@0 | 1407 | default: |
michael@0 | 1408 | switch (BYTE_TO_ASCII(enc, ptr)) { |
michael@0 | 1409 | case 0x24: /* $ */ |
michael@0 | 1410 | case 0x40: /* @ */ |
michael@0 | 1411 | break; |
michael@0 | 1412 | default: |
michael@0 | 1413 | *badPtr = ptr; |
michael@0 | 1414 | return 0; |
michael@0 | 1415 | } |
michael@0 | 1416 | break; |
michael@0 | 1417 | } |
michael@0 | 1418 | } |
michael@0 | 1419 | return 1; |
michael@0 | 1420 | } |
michael@0 | 1421 | |
michael@0 | 1422 | /* This must only be called for a well-formed start-tag or empty |
michael@0 | 1423 | element tag. Returns the number of attributes. Pointers to the |
michael@0 | 1424 | first attsMax attributes are stored in atts. |
michael@0 | 1425 | */ |
michael@0 | 1426 | |
michael@0 | 1427 | static int PTRCALL |
michael@0 | 1428 | PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
michael@0 | 1429 | int attsMax, ATTRIBUTE *atts) |
michael@0 | 1430 | { |
michael@0 | 1431 | enum { other, inName, inValue } state = inName; |
michael@0 | 1432 | int nAtts = 0; |
michael@0 | 1433 | int open = 0; /* defined when state == inValue; |
michael@0 | 1434 | initialization just to shut up compilers */ |
michael@0 | 1435 | |
michael@0 | 1436 | for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
michael@0 | 1437 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1438 | #define START_NAME \ |
michael@0 | 1439 | if (state == other) { \ |
michael@0 | 1440 | if (nAtts < attsMax) { \ |
michael@0 | 1441 | atts[nAtts].name = ptr; \ |
michael@0 | 1442 | atts[nAtts].normalized = 1; \ |
michael@0 | 1443 | } \ |
michael@0 | 1444 | state = inName; \ |
michael@0 | 1445 | } |
michael@0 | 1446 | #define LEAD_CASE(n) \ |
michael@0 | 1447 | case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
michael@0 | 1448 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 1449 | #undef LEAD_CASE |
michael@0 | 1450 | case BT_NONASCII: |
michael@0 | 1451 | case BT_NMSTRT: |
michael@0 | 1452 | case BT_HEX: |
michael@0 | 1453 | START_NAME |
michael@0 | 1454 | break; |
michael@0 | 1455 | #undef START_NAME |
michael@0 | 1456 | case BT_QUOT: |
michael@0 | 1457 | if (state != inValue) { |
michael@0 | 1458 | if (nAtts < attsMax) |
michael@0 | 1459 | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
michael@0 | 1460 | state = inValue; |
michael@0 | 1461 | open = BT_QUOT; |
michael@0 | 1462 | } |
michael@0 | 1463 | else if (open == BT_QUOT) { |
michael@0 | 1464 | state = other; |
michael@0 | 1465 | if (nAtts < attsMax) |
michael@0 | 1466 | atts[nAtts].valueEnd = ptr; |
michael@0 | 1467 | nAtts++; |
michael@0 | 1468 | } |
michael@0 | 1469 | break; |
michael@0 | 1470 | case BT_APOS: |
michael@0 | 1471 | if (state != inValue) { |
michael@0 | 1472 | if (nAtts < attsMax) |
michael@0 | 1473 | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
michael@0 | 1474 | state = inValue; |
michael@0 | 1475 | open = BT_APOS; |
michael@0 | 1476 | } |
michael@0 | 1477 | else if (open == BT_APOS) { |
michael@0 | 1478 | state = other; |
michael@0 | 1479 | if (nAtts < attsMax) |
michael@0 | 1480 | atts[nAtts].valueEnd = ptr; |
michael@0 | 1481 | nAtts++; |
michael@0 | 1482 | } |
michael@0 | 1483 | break; |
michael@0 | 1484 | case BT_AMP: |
michael@0 | 1485 | if (nAtts < attsMax) |
michael@0 | 1486 | atts[nAtts].normalized = 0; |
michael@0 | 1487 | break; |
michael@0 | 1488 | case BT_S: |
michael@0 | 1489 | if (state == inName) |
michael@0 | 1490 | state = other; |
michael@0 | 1491 | else if (state == inValue |
michael@0 | 1492 | && nAtts < attsMax |
michael@0 | 1493 | && atts[nAtts].normalized |
michael@0 | 1494 | && (ptr == atts[nAtts].valuePtr |
michael@0 | 1495 | || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE |
michael@0 | 1496 | || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE |
michael@0 | 1497 | || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
michael@0 | 1498 | atts[nAtts].normalized = 0; |
michael@0 | 1499 | break; |
michael@0 | 1500 | case BT_CR: case BT_LF: |
michael@0 | 1501 | /* This case ensures that the first attribute name is counted |
michael@0 | 1502 | Apart from that we could just change state on the quote. */ |
michael@0 | 1503 | if (state == inName) |
michael@0 | 1504 | state = other; |
michael@0 | 1505 | else if (state == inValue && nAtts < attsMax) |
michael@0 | 1506 | atts[nAtts].normalized = 0; |
michael@0 | 1507 | break; |
michael@0 | 1508 | case BT_GT: |
michael@0 | 1509 | case BT_SOL: |
michael@0 | 1510 | if (state != inValue) |
michael@0 | 1511 | return nAtts; |
michael@0 | 1512 | break; |
michael@0 | 1513 | default: |
michael@0 | 1514 | break; |
michael@0 | 1515 | } |
michael@0 | 1516 | } |
michael@0 | 1517 | /* not reached */ |
michael@0 | 1518 | } |
michael@0 | 1519 | |
michael@0 | 1520 | static int PTRFASTCALL |
michael@0 | 1521 | PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) |
michael@0 | 1522 | { |
michael@0 | 1523 | int result = 0; |
michael@0 | 1524 | /* skip &# */ |
michael@0 | 1525 | ptr += 2*MINBPC(enc); |
michael@0 | 1526 | if (CHAR_MATCHES(enc, ptr, ASCII_x)) { |
michael@0 | 1527 | for (ptr += MINBPC(enc); |
michael@0 | 1528 | !CHAR_MATCHES(enc, ptr, ASCII_SEMI); |
michael@0 | 1529 | ptr += MINBPC(enc)) { |
michael@0 | 1530 | int c = BYTE_TO_ASCII(enc, ptr); |
michael@0 | 1531 | switch (c) { |
michael@0 | 1532 | case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: |
michael@0 | 1533 | case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: |
michael@0 | 1534 | result <<= 4; |
michael@0 | 1535 | result |= (c - ASCII_0); |
michael@0 | 1536 | break; |
michael@0 | 1537 | case ASCII_A: case ASCII_B: case ASCII_C: |
michael@0 | 1538 | case ASCII_D: case ASCII_E: case ASCII_F: |
michael@0 | 1539 | result <<= 4; |
michael@0 | 1540 | result += 10 + (c - ASCII_A); |
michael@0 | 1541 | break; |
michael@0 | 1542 | case ASCII_a: case ASCII_b: case ASCII_c: |
michael@0 | 1543 | case ASCII_d: case ASCII_e: case ASCII_f: |
michael@0 | 1544 | result <<= 4; |
michael@0 | 1545 | result += 10 + (c - ASCII_a); |
michael@0 | 1546 | break; |
michael@0 | 1547 | } |
michael@0 | 1548 | if (result >= 0x110000) |
michael@0 | 1549 | return -1; |
michael@0 | 1550 | } |
michael@0 | 1551 | } |
michael@0 | 1552 | else { |
michael@0 | 1553 | for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { |
michael@0 | 1554 | int c = BYTE_TO_ASCII(enc, ptr); |
michael@0 | 1555 | result *= 10; |
michael@0 | 1556 | result += (c - ASCII_0); |
michael@0 | 1557 | if (result >= 0x110000) |
michael@0 | 1558 | return -1; |
michael@0 | 1559 | } |
michael@0 | 1560 | } |
michael@0 | 1561 | return checkCharRefNumber(result); |
michael@0 | 1562 | } |
michael@0 | 1563 | |
michael@0 | 1564 | static int PTRCALL |
michael@0 | 1565 | PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, |
michael@0 | 1566 | const char *end) |
michael@0 | 1567 | { |
michael@0 | 1568 | switch ((end - ptr)/MINBPC(enc)) { |
michael@0 | 1569 | case 2: |
michael@0 | 1570 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { |
michael@0 | 1571 | switch (BYTE_TO_ASCII(enc, ptr)) { |
michael@0 | 1572 | case ASCII_l: |
michael@0 | 1573 | return ASCII_LT; |
michael@0 | 1574 | case ASCII_g: |
michael@0 | 1575 | return ASCII_GT; |
michael@0 | 1576 | } |
michael@0 | 1577 | } |
michael@0 | 1578 | break; |
michael@0 | 1579 | case 3: |
michael@0 | 1580 | if (CHAR_MATCHES(enc, ptr, ASCII_a)) { |
michael@0 | 1581 | ptr += MINBPC(enc); |
michael@0 | 1582 | if (CHAR_MATCHES(enc, ptr, ASCII_m)) { |
michael@0 | 1583 | ptr += MINBPC(enc); |
michael@0 | 1584 | if (CHAR_MATCHES(enc, ptr, ASCII_p)) |
michael@0 | 1585 | return ASCII_AMP; |
michael@0 | 1586 | } |
michael@0 | 1587 | } |
michael@0 | 1588 | break; |
michael@0 | 1589 | case 4: |
michael@0 | 1590 | switch (BYTE_TO_ASCII(enc, ptr)) { |
michael@0 | 1591 | case ASCII_q: |
michael@0 | 1592 | ptr += MINBPC(enc); |
michael@0 | 1593 | if (CHAR_MATCHES(enc, ptr, ASCII_u)) { |
michael@0 | 1594 | ptr += MINBPC(enc); |
michael@0 | 1595 | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
michael@0 | 1596 | ptr += MINBPC(enc); |
michael@0 | 1597 | if (CHAR_MATCHES(enc, ptr, ASCII_t)) |
michael@0 | 1598 | return ASCII_QUOT; |
michael@0 | 1599 | } |
michael@0 | 1600 | } |
michael@0 | 1601 | break; |
michael@0 | 1602 | case ASCII_a: |
michael@0 | 1603 | ptr += MINBPC(enc); |
michael@0 | 1604 | if (CHAR_MATCHES(enc, ptr, ASCII_p)) { |
michael@0 | 1605 | ptr += MINBPC(enc); |
michael@0 | 1606 | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
michael@0 | 1607 | ptr += MINBPC(enc); |
michael@0 | 1608 | if (CHAR_MATCHES(enc, ptr, ASCII_s)) |
michael@0 | 1609 | return ASCII_APOS; |
michael@0 | 1610 | } |
michael@0 | 1611 | } |
michael@0 | 1612 | break; |
michael@0 | 1613 | } |
michael@0 | 1614 | } |
michael@0 | 1615 | return 0; |
michael@0 | 1616 | } |
michael@0 | 1617 | |
michael@0 | 1618 | static int PTRCALL |
michael@0 | 1619 | PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
michael@0 | 1620 | { |
michael@0 | 1621 | for (;;) { |
michael@0 | 1622 | switch (BYTE_TYPE(enc, ptr1)) { |
michael@0 | 1623 | #define LEAD_CASE(n) \ |
michael@0 | 1624 | case BT_LEAD ## n: \ |
michael@0 | 1625 | if (*ptr1++ != *ptr2++) \ |
michael@0 | 1626 | return 0; |
michael@0 | 1627 | LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) |
michael@0 | 1628 | #undef LEAD_CASE |
michael@0 | 1629 | /* fall through */ |
michael@0 | 1630 | if (*ptr1++ != *ptr2++) |
michael@0 | 1631 | return 0; |
michael@0 | 1632 | break; |
michael@0 | 1633 | case BT_NONASCII: |
michael@0 | 1634 | case BT_NMSTRT: |
michael@0 | 1635 | #ifdef XML_NS |
michael@0 | 1636 | case BT_COLON: |
michael@0 | 1637 | #endif |
michael@0 | 1638 | case BT_HEX: |
michael@0 | 1639 | case BT_DIGIT: |
michael@0 | 1640 | case BT_NAME: |
michael@0 | 1641 | case BT_MINUS: |
michael@0 | 1642 | if (*ptr2++ != *ptr1++) |
michael@0 | 1643 | return 0; |
michael@0 | 1644 | if (MINBPC(enc) > 1) { |
michael@0 | 1645 | if (*ptr2++ != *ptr1++) |
michael@0 | 1646 | return 0; |
michael@0 | 1647 | if (MINBPC(enc) > 2) { |
michael@0 | 1648 | if (*ptr2++ != *ptr1++) |
michael@0 | 1649 | return 0; |
michael@0 | 1650 | if (MINBPC(enc) > 3) { |
michael@0 | 1651 | if (*ptr2++ != *ptr1++) |
michael@0 | 1652 | return 0; |
michael@0 | 1653 | } |
michael@0 | 1654 | } |
michael@0 | 1655 | } |
michael@0 | 1656 | break; |
michael@0 | 1657 | default: |
michael@0 | 1658 | if (MINBPC(enc) == 1 && *ptr1 == *ptr2) |
michael@0 | 1659 | return 1; |
michael@0 | 1660 | switch (BYTE_TYPE(enc, ptr2)) { |
michael@0 | 1661 | case BT_LEAD2: |
michael@0 | 1662 | case BT_LEAD3: |
michael@0 | 1663 | case BT_LEAD4: |
michael@0 | 1664 | case BT_NONASCII: |
michael@0 | 1665 | case BT_NMSTRT: |
michael@0 | 1666 | #ifdef XML_NS |
michael@0 | 1667 | case BT_COLON: |
michael@0 | 1668 | #endif |
michael@0 | 1669 | case BT_HEX: |
michael@0 | 1670 | case BT_DIGIT: |
michael@0 | 1671 | case BT_NAME: |
michael@0 | 1672 | case BT_MINUS: |
michael@0 | 1673 | return 0; |
michael@0 | 1674 | default: |
michael@0 | 1675 | return 1; |
michael@0 | 1676 | } |
michael@0 | 1677 | } |
michael@0 | 1678 | } |
michael@0 | 1679 | /* not reached */ |
michael@0 | 1680 | } |
michael@0 | 1681 | |
michael@0 | 1682 | static int PTRCALL |
michael@0 | 1683 | PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, |
michael@0 | 1684 | const char *end1, const char *ptr2) |
michael@0 | 1685 | { |
michael@0 | 1686 | for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
michael@0 | 1687 | if (ptr1 == end1) |
michael@0 | 1688 | return 0; |
michael@0 | 1689 | if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
michael@0 | 1690 | return 0; |
michael@0 | 1691 | } |
michael@0 | 1692 | return ptr1 == end1; |
michael@0 | 1693 | } |
michael@0 | 1694 | |
michael@0 | 1695 | static int PTRFASTCALL |
michael@0 | 1696 | PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
michael@0 | 1697 | { |
michael@0 | 1698 | const char *start = ptr; |
michael@0 | 1699 | for (;;) { |
michael@0 | 1700 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1701 | #define LEAD_CASE(n) \ |
michael@0 | 1702 | case BT_LEAD ## n: ptr += n; break; |
michael@0 | 1703 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 1704 | #undef LEAD_CASE |
michael@0 | 1705 | case BT_NONASCII: |
michael@0 | 1706 | case BT_NMSTRT: |
michael@0 | 1707 | #ifdef XML_NS |
michael@0 | 1708 | case BT_COLON: |
michael@0 | 1709 | #endif |
michael@0 | 1710 | case BT_HEX: |
michael@0 | 1711 | case BT_DIGIT: |
michael@0 | 1712 | case BT_NAME: |
michael@0 | 1713 | case BT_MINUS: |
michael@0 | 1714 | ptr += MINBPC(enc); |
michael@0 | 1715 | break; |
michael@0 | 1716 | default: |
michael@0 | 1717 | return (int)(ptr - start); |
michael@0 | 1718 | } |
michael@0 | 1719 | } |
michael@0 | 1720 | } |
michael@0 | 1721 | |
michael@0 | 1722 | static const char * PTRFASTCALL |
michael@0 | 1723 | PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
michael@0 | 1724 | { |
michael@0 | 1725 | for (;;) { |
michael@0 | 1726 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1727 | case BT_LF: |
michael@0 | 1728 | case BT_CR: |
michael@0 | 1729 | case BT_S: |
michael@0 | 1730 | ptr += MINBPC(enc); |
michael@0 | 1731 | break; |
michael@0 | 1732 | default: |
michael@0 | 1733 | return ptr; |
michael@0 | 1734 | } |
michael@0 | 1735 | } |
michael@0 | 1736 | } |
michael@0 | 1737 | |
michael@0 | 1738 | static void PTRCALL |
michael@0 | 1739 | PREFIX(updatePosition)(const ENCODING *enc, |
michael@0 | 1740 | const char *ptr, |
michael@0 | 1741 | const char *end, |
michael@0 | 1742 | POSITION *pos) |
michael@0 | 1743 | { |
michael@0 | 1744 | while (ptr != end) { |
michael@0 | 1745 | switch (BYTE_TYPE(enc, ptr)) { |
michael@0 | 1746 | #define LEAD_CASE(n) \ |
michael@0 | 1747 | case BT_LEAD ## n: \ |
michael@0 | 1748 | ptr += n; \ |
michael@0 | 1749 | break; |
michael@0 | 1750 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
michael@0 | 1751 | #undef LEAD_CASE |
michael@0 | 1752 | case BT_LF: |
michael@0 | 1753 | pos->columnNumber = (XML_Size)-1; |
michael@0 | 1754 | pos->lineNumber++; |
michael@0 | 1755 | ptr += MINBPC(enc); |
michael@0 | 1756 | break; |
michael@0 | 1757 | case BT_CR: |
michael@0 | 1758 | pos->lineNumber++; |
michael@0 | 1759 | ptr += MINBPC(enc); |
michael@0 | 1760 | if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) |
michael@0 | 1761 | ptr += MINBPC(enc); |
michael@0 | 1762 | pos->columnNumber = (XML_Size)-1; |
michael@0 | 1763 | break; |
michael@0 | 1764 | default: |
michael@0 | 1765 | ptr += MINBPC(enc); |
michael@0 | 1766 | break; |
michael@0 | 1767 | } |
michael@0 | 1768 | pos->columnNumber++; |
michael@0 | 1769 | } |
michael@0 | 1770 | } |
michael@0 | 1771 | |
michael@0 | 1772 | #undef DO_LEAD_CASE |
michael@0 | 1773 | #undef MULTIBYTE_CASES |
michael@0 | 1774 | #undef INVALID_CASES |
michael@0 | 1775 | #undef CHECK_NAME_CASE |
michael@0 | 1776 | #undef CHECK_NAME_CASES |
michael@0 | 1777 | #undef CHECK_NMSTRT_CASE |
michael@0 | 1778 | #undef CHECK_NMSTRT_CASES |
michael@0 | 1779 |