1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/expat/lib/xmltok.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1658 @@ 1.4 +/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 1.5 + See the file COPYING for copying permission. 1.6 +*/ 1.7 + 1.8 +#include <stddef.h> 1.9 + 1.10 +#ifdef COMPILED_FROM_DSP 1.11 +#include "winconfig.h" 1.12 +#elif defined(MACOS_CLASSIC) 1.13 +#include "macconfig.h" 1.14 +#elif defined(__amigaos4__) 1.15 +#include "amigaconfig.h" 1.16 +#else 1.17 +#ifdef HAVE_EXPAT_CONFIG_H 1.18 +#include <expat_config.h> 1.19 +#endif 1.20 +#endif /* ndef COMPILED_FROM_DSP */ 1.21 + 1.22 +#include "expat_external.h" 1.23 +#include "internal.h" 1.24 +#include "xmltok.h" 1.25 +#include "nametab.h" 1.26 + 1.27 +#ifdef XML_DTD 1.28 +#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) 1.29 +#else 1.30 +#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ 1.31 +#endif 1.32 + 1.33 +#define VTABLE1 \ 1.34 + { PREFIX(prologTok), PREFIX(contentTok), \ 1.35 + PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ 1.36 + { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ 1.37 + PREFIX(sameName), \ 1.38 + PREFIX(nameMatchesAscii), \ 1.39 + PREFIX(nameLength), \ 1.40 + PREFIX(skipS), \ 1.41 + PREFIX(getAtts), \ 1.42 + PREFIX(charRefNumber), \ 1.43 + PREFIX(predefinedEntityName), \ 1.44 + PREFIX(updatePosition), \ 1.45 + PREFIX(isPublicId) 1.46 + 1.47 +#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) 1.48 + 1.49 +#define UCS2_GET_NAMING(pages, hi, lo) \ 1.50 + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) 1.51 + 1.52 +/* A 2 byte UTF-8 representation splits the characters 11 bits between 1.53 + the bottom 5 and 6 bits of the bytes. We need 8 bits to index into 1.54 + pages, 3 bits to add to that index and 5 bits to generate the mask. 1.55 +*/ 1.56 +#define UTF8_GET_NAMING2(pages, byte) \ 1.57 + (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ 1.58 + + ((((byte)[0]) & 3) << 1) \ 1.59 + + ((((byte)[1]) >> 5) & 1)] \ 1.60 + & (1 << (((byte)[1]) & 0x1F))) 1.61 + 1.62 +/* A 3 byte UTF-8 representation splits the characters 16 bits between 1.63 + the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index 1.64 + into pages, 3 bits to add to that index and 5 bits to generate the 1.65 + mask. 1.66 +*/ 1.67 +#define UTF8_GET_NAMING3(pages, byte) \ 1.68 + (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ 1.69 + + ((((byte)[1]) >> 2) & 0xF)] \ 1.70 + << 3) \ 1.71 + + ((((byte)[1]) & 3) << 1) \ 1.72 + + ((((byte)[2]) >> 5) & 1)] \ 1.73 + & (1 << (((byte)[2]) & 0x1F))) 1.74 + 1.75 +#define UTF8_GET_NAMING(pages, p, n) \ 1.76 + ((n) == 2 \ 1.77 + ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ 1.78 + : ((n) == 3 \ 1.79 + ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ 1.80 + : 0)) 1.81 + 1.82 +/* Detection of invalid UTF-8 sequences is based on Table 3.1B 1.83 + of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ 1.84 + with the additional restriction of not allowing the Unicode 1.85 + code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). 1.86 + Implementation details: 1.87 + (A & 0x80) == 0 means A < 0x80 1.88 + and 1.89 + (A & 0xC0) == 0xC0 means A > 0xBF 1.90 +*/ 1.91 + 1.92 +#define UTF8_INVALID2(p) \ 1.93 + ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) 1.94 + 1.95 +#define UTF8_INVALID3(p) \ 1.96 + (((p)[2] & 0x80) == 0 \ 1.97 + || \ 1.98 + ((*p) == 0xEF && (p)[1] == 0xBF \ 1.99 + ? \ 1.100 + (p)[2] > 0xBD \ 1.101 + : \ 1.102 + ((p)[2] & 0xC0) == 0xC0) \ 1.103 + || \ 1.104 + ((*p) == 0xE0 \ 1.105 + ? \ 1.106 + (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ 1.107 + : \ 1.108 + ((p)[1] & 0x80) == 0 \ 1.109 + || \ 1.110 + ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) 1.111 + 1.112 +#define UTF8_INVALID4(p) \ 1.113 + (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \ 1.114 + || \ 1.115 + ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \ 1.116 + || \ 1.117 + ((*p) == 0xF0 \ 1.118 + ? \ 1.119 + (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ 1.120 + : \ 1.121 + ((p)[1] & 0x80) == 0 \ 1.122 + || \ 1.123 + ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) 1.124 + 1.125 +static int PTRFASTCALL 1.126 +isNever(const ENCODING *enc, const char *p) 1.127 +{ 1.128 + return 0; 1.129 +} 1.130 + 1.131 +static int PTRFASTCALL 1.132 +utf8_isName2(const ENCODING *enc, const char *p) 1.133 +{ 1.134 + return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); 1.135 +} 1.136 + 1.137 +static int PTRFASTCALL 1.138 +utf8_isName3(const ENCODING *enc, const char *p) 1.139 +{ 1.140 + return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); 1.141 +} 1.142 + 1.143 +#define utf8_isName4 isNever 1.144 + 1.145 +static int PTRFASTCALL 1.146 +utf8_isNmstrt2(const ENCODING *enc, const char *p) 1.147 +{ 1.148 + return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); 1.149 +} 1.150 + 1.151 +static int PTRFASTCALL 1.152 +utf8_isNmstrt3(const ENCODING *enc, const char *p) 1.153 +{ 1.154 + return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); 1.155 +} 1.156 + 1.157 +#define utf8_isNmstrt4 isNever 1.158 + 1.159 +static int PTRFASTCALL 1.160 +utf8_isInvalid2(const ENCODING *enc, const char *p) 1.161 +{ 1.162 + return UTF8_INVALID2((const unsigned char *)p); 1.163 +} 1.164 + 1.165 +static int PTRFASTCALL 1.166 +utf8_isInvalid3(const ENCODING *enc, const char *p) 1.167 +{ 1.168 + return UTF8_INVALID3((const unsigned char *)p); 1.169 +} 1.170 + 1.171 +static int PTRFASTCALL 1.172 +utf8_isInvalid4(const ENCODING *enc, const char *p) 1.173 +{ 1.174 + return UTF8_INVALID4((const unsigned char *)p); 1.175 +} 1.176 + 1.177 +struct normal_encoding { 1.178 + ENCODING enc; 1.179 + unsigned char type[256]; 1.180 +#ifdef XML_MIN_SIZE 1.181 + int (PTRFASTCALL *byteType)(const ENCODING *, const char *); 1.182 + int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *); 1.183 + int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); 1.184 + int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); 1.185 + int (PTRCALL *charMatches)(const ENCODING *, const char *, int); 1.186 +#endif /* XML_MIN_SIZE */ 1.187 + int (PTRFASTCALL *isName2)(const ENCODING *, const char *); 1.188 + int (PTRFASTCALL *isName3)(const ENCODING *, const char *); 1.189 + int (PTRFASTCALL *isName4)(const ENCODING *, const char *); 1.190 + int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); 1.191 + int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); 1.192 + int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); 1.193 + int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); 1.194 + int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); 1.195 + int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); 1.196 +}; 1.197 + 1.198 +#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc)) 1.199 + 1.200 +#ifdef XML_MIN_SIZE 1.201 + 1.202 +#define STANDARD_VTABLE(E) \ 1.203 + E ## byteType, \ 1.204 + E ## isNameMin, \ 1.205 + E ## isNmstrtMin, \ 1.206 + E ## byteToAscii, \ 1.207 + E ## charMatches, 1.208 + 1.209 +#else 1.210 + 1.211 +#define STANDARD_VTABLE(E) /* as nothing */ 1.212 + 1.213 +#endif 1.214 + 1.215 +#define NORMAL_VTABLE(E) \ 1.216 + E ## isName2, \ 1.217 + E ## isName3, \ 1.218 + E ## isName4, \ 1.219 + E ## isNmstrt2, \ 1.220 + E ## isNmstrt3, \ 1.221 + E ## isNmstrt4, \ 1.222 + E ## isInvalid2, \ 1.223 + E ## isInvalid3, \ 1.224 + E ## isInvalid4 1.225 + 1.226 +static int FASTCALL checkCharRefNumber(int); 1.227 + 1.228 +#include "xmltok_impl.h" 1.229 +#include "ascii.h" 1.230 + 1.231 +#ifdef XML_MIN_SIZE 1.232 +#define sb_isNameMin isNever 1.233 +#define sb_isNmstrtMin isNever 1.234 +#endif 1.235 + 1.236 +#ifdef XML_MIN_SIZE 1.237 +#define MINBPC(enc) ((enc)->minBytesPerChar) 1.238 +#else 1.239 +/* minimum bytes per character */ 1.240 +#define MINBPC(enc) 1 1.241 +#endif 1.242 + 1.243 +#define SB_BYTE_TYPE(enc, p) \ 1.244 + (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) 1.245 + 1.246 +#ifdef XML_MIN_SIZE 1.247 +static int PTRFASTCALL 1.248 +sb_byteType(const ENCODING *enc, const char *p) 1.249 +{ 1.250 + return SB_BYTE_TYPE(enc, p); 1.251 +} 1.252 +#define BYTE_TYPE(enc, p) \ 1.253 + (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) 1.254 +#else 1.255 +#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) 1.256 +#endif 1.257 + 1.258 +#ifdef XML_MIN_SIZE 1.259 +#define BYTE_TO_ASCII(enc, p) \ 1.260 + (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) 1.261 +static int PTRFASTCALL 1.262 +sb_byteToAscii(const ENCODING *enc, const char *p) 1.263 +{ 1.264 + return *p; 1.265 +} 1.266 +#else 1.267 +#define BYTE_TO_ASCII(enc, p) (*(p)) 1.268 +#endif 1.269 + 1.270 +#define IS_NAME_CHAR(enc, p, n) \ 1.271 + (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p)) 1.272 +#define IS_NMSTRT_CHAR(enc, p, n) \ 1.273 + (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p)) 1.274 +#define IS_INVALID_CHAR(enc, p, n) \ 1.275 + (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p)) 1.276 + 1.277 +#ifdef XML_MIN_SIZE 1.278 +#define IS_NAME_CHAR_MINBPC(enc, p) \ 1.279 + (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) 1.280 +#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ 1.281 + (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) 1.282 +#else 1.283 +#define IS_NAME_CHAR_MINBPC(enc, p) (0) 1.284 +#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) 1.285 +#endif 1.286 + 1.287 +#ifdef XML_MIN_SIZE 1.288 +#define CHAR_MATCHES(enc, p, c) \ 1.289 + (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) 1.290 +static int PTRCALL 1.291 +sb_charMatches(const ENCODING *enc, const char *p, int c) 1.292 +{ 1.293 + return *p == c; 1.294 +} 1.295 +#else 1.296 +/* c is an ASCII character */ 1.297 +#define CHAR_MATCHES(enc, p, c) (*(p) == c) 1.298 +#endif 1.299 + 1.300 +#define PREFIX(ident) normal_ ## ident 1.301 +#include "xmltok_impl.c" 1.302 + 1.303 +#undef MINBPC 1.304 +#undef BYTE_TYPE 1.305 +#undef BYTE_TO_ASCII 1.306 +#undef CHAR_MATCHES 1.307 +#undef IS_NAME_CHAR 1.308 +#undef IS_NAME_CHAR_MINBPC 1.309 +#undef IS_NMSTRT_CHAR 1.310 +#undef IS_NMSTRT_CHAR_MINBPC 1.311 +#undef IS_INVALID_CHAR 1.312 + 1.313 +enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 1.314 + UTF8_cval1 = 0x00, 1.315 + UTF8_cval2 = 0xc0, 1.316 + UTF8_cval3 = 0xe0, 1.317 + UTF8_cval4 = 0xf0 1.318 +}; 1.319 + 1.320 +static void PTRCALL 1.321 +utf8_toUtf8(const ENCODING *enc, 1.322 + const char **fromP, const char *fromLim, 1.323 + char **toP, const char *toLim) 1.324 +{ 1.325 + char *to; 1.326 + const char *from; 1.327 + if (fromLim - *fromP > toLim - *toP) { 1.328 + /* Avoid copying partial characters. */ 1.329 + for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) 1.330 + if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) 1.331 + break; 1.332 + } 1.333 + for (to = *toP, from = *fromP; from != fromLim; from++, to++) 1.334 + *to = *from; 1.335 + *fromP = from; 1.336 + *toP = to; 1.337 +} 1.338 + 1.339 +static void PTRCALL 1.340 +utf8_toUtf16(const ENCODING *enc, 1.341 + const char **fromP, const char *fromLim, 1.342 + unsigned short **toP, const unsigned short *toLim) 1.343 +{ 1.344 + unsigned short *to = *toP; 1.345 + const char *from = *fromP; 1.346 + while (from != fromLim && to != toLim) { 1.347 + switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { 1.348 + case BT_LEAD2: 1.349 + *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); 1.350 + from += 2; 1.351 + break; 1.352 + case BT_LEAD3: 1.353 + *to++ = (unsigned short)(((from[0] & 0xf) << 12) 1.354 + | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); 1.355 + from += 3; 1.356 + break; 1.357 + case BT_LEAD4: 1.358 + { 1.359 + unsigned long n; 1.360 + if (to + 1 == toLim) 1.361 + goto after; 1.362 + n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) 1.363 + | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); 1.364 + n -= 0x10000; 1.365 + to[0] = (unsigned short)((n >> 10) | 0xD800); 1.366 + to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); 1.367 + to += 2; 1.368 + from += 4; 1.369 + } 1.370 + break; 1.371 + default: 1.372 + *to++ = *from++; 1.373 + break; 1.374 + } 1.375 + } 1.376 +after: 1.377 + *fromP = from; 1.378 + *toP = to; 1.379 +} 1.380 + 1.381 +#ifdef XML_NS 1.382 +static const struct normal_encoding utf8_encoding_ns = { 1.383 + { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 1.384 + { 1.385 +#include "asciitab.h" 1.386 +#include "utf8tab.h" 1.387 + }, 1.388 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 1.389 +}; 1.390 +#endif 1.391 + 1.392 +static const struct normal_encoding utf8_encoding = { 1.393 + { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 1.394 + { 1.395 +#define BT_COLON BT_NMSTRT 1.396 +#include "asciitab.h" 1.397 +#undef BT_COLON 1.398 +#include "utf8tab.h" 1.399 + }, 1.400 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 1.401 +}; 1.402 + 1.403 +#ifdef XML_NS 1.404 + 1.405 +static const struct normal_encoding internal_utf8_encoding_ns = { 1.406 + { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 1.407 + { 1.408 +#include "iasciitab.h" 1.409 +#include "utf8tab.h" 1.410 + }, 1.411 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 1.412 +}; 1.413 + 1.414 +#endif 1.415 + 1.416 +static const struct normal_encoding internal_utf8_encoding = { 1.417 + { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, 1.418 + { 1.419 +#define BT_COLON BT_NMSTRT 1.420 +#include "iasciitab.h" 1.421 +#undef BT_COLON 1.422 +#include "utf8tab.h" 1.423 + }, 1.424 + STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) 1.425 +}; 1.426 + 1.427 +static void PTRCALL 1.428 +latin1_toUtf8(const ENCODING *enc, 1.429 + const char **fromP, const char *fromLim, 1.430 + char **toP, const char *toLim) 1.431 +{ 1.432 + for (;;) { 1.433 + unsigned char c; 1.434 + if (*fromP == fromLim) 1.435 + break; 1.436 + c = (unsigned char)**fromP; 1.437 + if (c & 0x80) { 1.438 + if (toLim - *toP < 2) 1.439 + break; 1.440 + *(*toP)++ = (char)((c >> 6) | UTF8_cval2); 1.441 + *(*toP)++ = (char)((c & 0x3f) | 0x80); 1.442 + (*fromP)++; 1.443 + } 1.444 + else { 1.445 + if (*toP == toLim) 1.446 + break; 1.447 + *(*toP)++ = *(*fromP)++; 1.448 + } 1.449 + } 1.450 +} 1.451 + 1.452 +static void PTRCALL 1.453 +latin1_toUtf16(const ENCODING *enc, 1.454 + const char **fromP, const char *fromLim, 1.455 + unsigned short **toP, const unsigned short *toLim) 1.456 +{ 1.457 + while (*fromP != fromLim && *toP != toLim) 1.458 + *(*toP)++ = (unsigned char)*(*fromP)++; 1.459 +} 1.460 + 1.461 +#ifdef XML_NS 1.462 + 1.463 +static const struct normal_encoding latin1_encoding_ns = { 1.464 + { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 1.465 + { 1.466 +#include "asciitab.h" 1.467 +#include "latin1tab.h" 1.468 + }, 1.469 + STANDARD_VTABLE(sb_) 1.470 +}; 1.471 + 1.472 +#endif 1.473 + 1.474 +static const struct normal_encoding latin1_encoding = { 1.475 + { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, 1.476 + { 1.477 +#define BT_COLON BT_NMSTRT 1.478 +#include "asciitab.h" 1.479 +#undef BT_COLON 1.480 +#include "latin1tab.h" 1.481 + }, 1.482 + STANDARD_VTABLE(sb_) 1.483 +}; 1.484 + 1.485 +static void PTRCALL 1.486 +ascii_toUtf8(const ENCODING *enc, 1.487 + const char **fromP, const char *fromLim, 1.488 + char **toP, const char *toLim) 1.489 +{ 1.490 + while (*fromP != fromLim && *toP != toLim) 1.491 + *(*toP)++ = *(*fromP)++; 1.492 +} 1.493 + 1.494 +#ifdef XML_NS 1.495 + 1.496 +static const struct normal_encoding ascii_encoding_ns = { 1.497 + { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 1.498 + { 1.499 +#include "asciitab.h" 1.500 +/* BT_NONXML == 0 */ 1.501 + }, 1.502 + STANDARD_VTABLE(sb_) 1.503 +}; 1.504 + 1.505 +#endif 1.506 + 1.507 +static const struct normal_encoding ascii_encoding = { 1.508 + { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, 1.509 + { 1.510 +#define BT_COLON BT_NMSTRT 1.511 +#include "asciitab.h" 1.512 +#undef BT_COLON 1.513 +/* BT_NONXML == 0 */ 1.514 + }, 1.515 + STANDARD_VTABLE(sb_) 1.516 +}; 1.517 + 1.518 +static int PTRFASTCALL 1.519 +unicode_byte_type(char hi, char lo) 1.520 +{ 1.521 + switch ((unsigned char)hi) { 1.522 + case 0xD8: case 0xD9: case 0xDA: case 0xDB: 1.523 + return BT_LEAD4; 1.524 + case 0xDC: case 0xDD: case 0xDE: case 0xDF: 1.525 + return BT_TRAIL; 1.526 + case 0xFF: 1.527 + switch ((unsigned char)lo) { 1.528 + case 0xFF: 1.529 + case 0xFE: 1.530 + return BT_NONXML; 1.531 + } 1.532 + break; 1.533 + } 1.534 + return BT_NONASCII; 1.535 +} 1.536 + 1.537 +#define DEFINE_UTF16_TO_UTF8(E) \ 1.538 +static void PTRCALL \ 1.539 +E ## toUtf8(const ENCODING *enc, \ 1.540 + const char **fromP, const char *fromLim, \ 1.541 + char **toP, const char *toLim) \ 1.542 +{ \ 1.543 + const char *from; \ 1.544 + for (from = *fromP; from != fromLim; from += 2) { \ 1.545 + int plane; \ 1.546 + unsigned char lo2; \ 1.547 + unsigned char lo = GET_LO(from); \ 1.548 + unsigned char hi = GET_HI(from); \ 1.549 + switch (hi) { \ 1.550 + case 0: \ 1.551 + if (lo < 0x80) { \ 1.552 + if (*toP == toLim) { \ 1.553 + *fromP = from; \ 1.554 + return; \ 1.555 + } \ 1.556 + *(*toP)++ = lo; \ 1.557 + break; \ 1.558 + } \ 1.559 + /* fall through */ \ 1.560 + case 0x1: case 0x2: case 0x3: \ 1.561 + case 0x4: case 0x5: case 0x6: case 0x7: \ 1.562 + if (toLim - *toP < 2) { \ 1.563 + *fromP = from; \ 1.564 + return; \ 1.565 + } \ 1.566 + *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ 1.567 + *(*toP)++ = ((lo & 0x3f) | 0x80); \ 1.568 + break; \ 1.569 + default: \ 1.570 + if (toLim - *toP < 3) { \ 1.571 + *fromP = from; \ 1.572 + return; \ 1.573 + } \ 1.574 + /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ 1.575 + *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ 1.576 + *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ 1.577 + *(*toP)++ = ((lo & 0x3f) | 0x80); \ 1.578 + break; \ 1.579 + case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ 1.580 + if (toLim - *toP < 4) { \ 1.581 + *fromP = from; \ 1.582 + return; \ 1.583 + } \ 1.584 + plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ 1.585 + *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ 1.586 + *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ 1.587 + from += 2; \ 1.588 + lo2 = GET_LO(from); \ 1.589 + *(*toP)++ = (((lo & 0x3) << 4) \ 1.590 + | ((GET_HI(from) & 0x3) << 2) \ 1.591 + | (lo2 >> 6) \ 1.592 + | 0x80); \ 1.593 + *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ 1.594 + break; \ 1.595 + } \ 1.596 + } \ 1.597 + *fromP = from; \ 1.598 +} 1.599 + 1.600 +#define DEFINE_UTF16_TO_UTF16(E) \ 1.601 +static void PTRCALL \ 1.602 +E ## toUtf16(const ENCODING *enc, \ 1.603 + const char **fromP, const char *fromLim, \ 1.604 + unsigned short **toP, const unsigned short *toLim) \ 1.605 +{ \ 1.606 + /* Avoid copying first half only of surrogate */ \ 1.607 + if (fromLim - *fromP > ((toLim - *toP) << 1) \ 1.608 + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ 1.609 + fromLim -= 2; \ 1.610 + for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ 1.611 + *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ 1.612 +} 1.613 + 1.614 +#define SET2(ptr, ch) \ 1.615 + (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) 1.616 +#define GET_LO(ptr) ((unsigned char)(ptr)[0]) 1.617 +#define GET_HI(ptr) ((unsigned char)(ptr)[1]) 1.618 + 1.619 +DEFINE_UTF16_TO_UTF8(little2_) 1.620 +DEFINE_UTF16_TO_UTF16(little2_) 1.621 + 1.622 +#undef SET2 1.623 +#undef GET_LO 1.624 +#undef GET_HI 1.625 + 1.626 +#define SET2(ptr, ch) \ 1.627 + (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) 1.628 +#define GET_LO(ptr) ((unsigned char)(ptr)[1]) 1.629 +#define GET_HI(ptr) ((unsigned char)(ptr)[0]) 1.630 + 1.631 +DEFINE_UTF16_TO_UTF8(big2_) 1.632 +DEFINE_UTF16_TO_UTF16(big2_) 1.633 + 1.634 +#undef SET2 1.635 +#undef GET_LO 1.636 +#undef GET_HI 1.637 + 1.638 +#define LITTLE2_BYTE_TYPE(enc, p) \ 1.639 + ((p)[1] == 0 \ 1.640 + ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ 1.641 + : unicode_byte_type((p)[1], (p)[0])) 1.642 +#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) 1.643 +#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) 1.644 +#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \ 1.645 + UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) 1.646 +#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ 1.647 + UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) 1.648 + 1.649 +#ifdef XML_MIN_SIZE 1.650 + 1.651 +static int PTRFASTCALL 1.652 +little2_byteType(const ENCODING *enc, const char *p) 1.653 +{ 1.654 + return LITTLE2_BYTE_TYPE(enc, p); 1.655 +} 1.656 + 1.657 +static int PTRFASTCALL 1.658 +little2_byteToAscii(const ENCODING *enc, const char *p) 1.659 +{ 1.660 + return LITTLE2_BYTE_TO_ASCII(enc, p); 1.661 +} 1.662 + 1.663 +static int PTRCALL 1.664 +little2_charMatches(const ENCODING *enc, const char *p, int c) 1.665 +{ 1.666 + return LITTLE2_CHAR_MATCHES(enc, p, c); 1.667 +} 1.668 + 1.669 +static int PTRFASTCALL 1.670 +little2_isNameMin(const ENCODING *enc, const char *p) 1.671 +{ 1.672 + return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); 1.673 +} 1.674 + 1.675 +static int PTRFASTCALL 1.676 +little2_isNmstrtMin(const ENCODING *enc, const char *p) 1.677 +{ 1.678 + return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); 1.679 +} 1.680 + 1.681 +#undef VTABLE 1.682 +#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 1.683 + 1.684 +#else /* not XML_MIN_SIZE */ 1.685 + 1.686 +#undef PREFIX 1.687 +#define PREFIX(ident) little2_ ## ident 1.688 +#define MINBPC(enc) 2 1.689 +/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 1.690 +#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) 1.691 +#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) 1.692 +#define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) 1.693 +#define IS_NAME_CHAR(enc, p, n) 0 1.694 +#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) 1.695 +#define IS_NMSTRT_CHAR(enc, p, n) (0) 1.696 +#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) 1.697 + 1.698 +#include "xmltok_impl.c" 1.699 + 1.700 +#undef MINBPC 1.701 +#undef BYTE_TYPE 1.702 +#undef BYTE_TO_ASCII 1.703 +#undef CHAR_MATCHES 1.704 +#undef IS_NAME_CHAR 1.705 +#undef IS_NAME_CHAR_MINBPC 1.706 +#undef IS_NMSTRT_CHAR 1.707 +#undef IS_NMSTRT_CHAR_MINBPC 1.708 +#undef IS_INVALID_CHAR 1.709 + 1.710 +#endif /* not XML_MIN_SIZE */ 1.711 + 1.712 +#ifdef XML_NS 1.713 + 1.714 +static const struct normal_encoding little2_encoding_ns = { 1.715 + { VTABLE, 2, 0, 1.716 +#if BYTEORDER == 1234 1.717 + 1 1.718 +#else 1.719 + 0 1.720 +#endif 1.721 + }, 1.722 + { 1.723 +#include "asciitab.h" 1.724 +#include "latin1tab.h" 1.725 + }, 1.726 + STANDARD_VTABLE(little2_) 1.727 +}; 1.728 + 1.729 +#endif 1.730 + 1.731 +static const struct normal_encoding little2_encoding = { 1.732 + { VTABLE, 2, 0, 1.733 +#if BYTEORDER == 1234 1.734 + 1 1.735 +#else 1.736 + 0 1.737 +#endif 1.738 + }, 1.739 + { 1.740 +#define BT_COLON BT_NMSTRT 1.741 +#include "asciitab.h" 1.742 +#undef BT_COLON 1.743 +#include "latin1tab.h" 1.744 + }, 1.745 + STANDARD_VTABLE(little2_) 1.746 +}; 1.747 + 1.748 +#if BYTEORDER != 4321 1.749 + 1.750 +#ifdef XML_NS 1.751 + 1.752 +static const struct normal_encoding internal_little2_encoding_ns = { 1.753 + { VTABLE, 2, 0, 1 }, 1.754 + { 1.755 +#include "iasciitab.h" 1.756 +#include "latin1tab.h" 1.757 + }, 1.758 + STANDARD_VTABLE(little2_) 1.759 +}; 1.760 + 1.761 +#endif 1.762 + 1.763 +static const struct normal_encoding internal_little2_encoding = { 1.764 + { VTABLE, 2, 0, 1 }, 1.765 + { 1.766 +#define BT_COLON BT_NMSTRT 1.767 +#include "iasciitab.h" 1.768 +#undef BT_COLON 1.769 +#include "latin1tab.h" 1.770 + }, 1.771 + STANDARD_VTABLE(little2_) 1.772 +}; 1.773 + 1.774 +#endif 1.775 + 1.776 + 1.777 +#define BIG2_BYTE_TYPE(enc, p) \ 1.778 + ((p)[0] == 0 \ 1.779 + ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ 1.780 + : unicode_byte_type((p)[0], (p)[1])) 1.781 +#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) 1.782 +#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) 1.783 +#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \ 1.784 + UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) 1.785 +#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ 1.786 + UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) 1.787 + 1.788 +#ifdef XML_MIN_SIZE 1.789 + 1.790 +static int PTRFASTCALL 1.791 +big2_byteType(const ENCODING *enc, const char *p) 1.792 +{ 1.793 + return BIG2_BYTE_TYPE(enc, p); 1.794 +} 1.795 + 1.796 +static int PTRFASTCALL 1.797 +big2_byteToAscii(const ENCODING *enc, const char *p) 1.798 +{ 1.799 + return BIG2_BYTE_TO_ASCII(enc, p); 1.800 +} 1.801 + 1.802 +static int PTRCALL 1.803 +big2_charMatches(const ENCODING *enc, const char *p, int c) 1.804 +{ 1.805 + return BIG2_CHAR_MATCHES(enc, p, c); 1.806 +} 1.807 + 1.808 +static int PTRFASTCALL 1.809 +big2_isNameMin(const ENCODING *enc, const char *p) 1.810 +{ 1.811 + return BIG2_IS_NAME_CHAR_MINBPC(enc, p); 1.812 +} 1.813 + 1.814 +static int PTRFASTCALL 1.815 +big2_isNmstrtMin(const ENCODING *enc, const char *p) 1.816 +{ 1.817 + return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); 1.818 +} 1.819 + 1.820 +#undef VTABLE 1.821 +#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 1.822 + 1.823 +#else /* not XML_MIN_SIZE */ 1.824 + 1.825 +#undef PREFIX 1.826 +#define PREFIX(ident) big2_ ## ident 1.827 +#define MINBPC(enc) 2 1.828 +/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 1.829 +#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) 1.830 +#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) 1.831 +#define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) 1.832 +#define IS_NAME_CHAR(enc, p, n) 0 1.833 +#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) 1.834 +#define IS_NMSTRT_CHAR(enc, p, n) (0) 1.835 +#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) 1.836 + 1.837 +#include "xmltok_impl.c" 1.838 + 1.839 +#undef MINBPC 1.840 +#undef BYTE_TYPE 1.841 +#undef BYTE_TO_ASCII 1.842 +#undef CHAR_MATCHES 1.843 +#undef IS_NAME_CHAR 1.844 +#undef IS_NAME_CHAR_MINBPC 1.845 +#undef IS_NMSTRT_CHAR 1.846 +#undef IS_NMSTRT_CHAR_MINBPC 1.847 +#undef IS_INVALID_CHAR 1.848 + 1.849 +#endif /* not XML_MIN_SIZE */ 1.850 + 1.851 +#ifdef XML_NS 1.852 + 1.853 +static const struct normal_encoding big2_encoding_ns = { 1.854 + { VTABLE, 2, 0, 1.855 +#if BYTEORDER == 4321 1.856 + 1 1.857 +#else 1.858 + 0 1.859 +#endif 1.860 + }, 1.861 + { 1.862 +#include "asciitab.h" 1.863 +#include "latin1tab.h" 1.864 + }, 1.865 + STANDARD_VTABLE(big2_) 1.866 +}; 1.867 + 1.868 +#endif 1.869 + 1.870 +static const struct normal_encoding big2_encoding = { 1.871 + { VTABLE, 2, 0, 1.872 +#if BYTEORDER == 4321 1.873 + 1 1.874 +#else 1.875 + 0 1.876 +#endif 1.877 + }, 1.878 + { 1.879 +#define BT_COLON BT_NMSTRT 1.880 +#include "asciitab.h" 1.881 +#undef BT_COLON 1.882 +#include "latin1tab.h" 1.883 + }, 1.884 + STANDARD_VTABLE(big2_) 1.885 +}; 1.886 + 1.887 +#if BYTEORDER != 1234 1.888 + 1.889 +#ifdef XML_NS 1.890 + 1.891 +static const struct normal_encoding internal_big2_encoding_ns = { 1.892 + { VTABLE, 2, 0, 1 }, 1.893 + { 1.894 +#include "iasciitab.h" 1.895 +#include "latin1tab.h" 1.896 + }, 1.897 + STANDARD_VTABLE(big2_) 1.898 +}; 1.899 + 1.900 +#endif 1.901 + 1.902 +static const struct normal_encoding internal_big2_encoding = { 1.903 + { VTABLE, 2, 0, 1 }, 1.904 + { 1.905 +#define BT_COLON BT_NMSTRT 1.906 +#include "iasciitab.h" 1.907 +#undef BT_COLON 1.908 +#include "latin1tab.h" 1.909 + }, 1.910 + STANDARD_VTABLE(big2_) 1.911 +}; 1.912 + 1.913 +#endif 1.914 + 1.915 +#undef PREFIX 1.916 + 1.917 +static int FASTCALL 1.918 +streqci(const char *s1, const char *s2) 1.919 +{ 1.920 + for (;;) { 1.921 + char c1 = *s1++; 1.922 + char c2 = *s2++; 1.923 + if (ASCII_a <= c1 && c1 <= ASCII_z) 1.924 + c1 += ASCII_A - ASCII_a; 1.925 + if (ASCII_a <= c2 && c2 <= ASCII_z) 1.926 + c2 += ASCII_A - ASCII_a; 1.927 + if (c1 != c2) 1.928 + return 0; 1.929 + if (!c1) 1.930 + break; 1.931 + } 1.932 + return 1; 1.933 +} 1.934 + 1.935 +static void PTRCALL 1.936 +initUpdatePosition(const ENCODING *enc, const char *ptr, 1.937 + const char *end, POSITION *pos) 1.938 +{ 1.939 + normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); 1.940 +} 1.941 + 1.942 +static int 1.943 +toAscii(const ENCODING *enc, const char *ptr, const char *end) 1.944 +{ 1.945 + char buf[1]; 1.946 + char *p = buf; 1.947 + XmlUtf8Convert(enc, &ptr, end, &p, p + 1); 1.948 + if (p == buf) 1.949 + return -1; 1.950 + else 1.951 + return buf[0]; 1.952 +} 1.953 + 1.954 +static int FASTCALL 1.955 +isSpace(int c) 1.956 +{ 1.957 + switch (c) { 1.958 + case 0x20: 1.959 + case 0xD: 1.960 + case 0xA: 1.961 + case 0x9: 1.962 + return 1; 1.963 + } 1.964 + return 0; 1.965 +} 1.966 + 1.967 +/* Return 1 if there's just optional white space or there's an S 1.968 + followed by name=val. 1.969 +*/ 1.970 +static int 1.971 +parsePseudoAttribute(const ENCODING *enc, 1.972 + const char *ptr, 1.973 + const char *end, 1.974 + const char **namePtr, 1.975 + const char **nameEndPtr, 1.976 + const char **valPtr, 1.977 + const char **nextTokPtr) 1.978 +{ 1.979 + int c; 1.980 + char open; 1.981 + if (ptr == end) { 1.982 + *namePtr = NULL; 1.983 + return 1; 1.984 + } 1.985 + if (!isSpace(toAscii(enc, ptr, end))) { 1.986 + *nextTokPtr = ptr; 1.987 + return 0; 1.988 + } 1.989 + do { 1.990 + ptr += enc->minBytesPerChar; 1.991 + } while (isSpace(toAscii(enc, ptr, end))); 1.992 + if (ptr == end) { 1.993 + *namePtr = NULL; 1.994 + return 1; 1.995 + } 1.996 + *namePtr = ptr; 1.997 + for (;;) { 1.998 + c = toAscii(enc, ptr, end); 1.999 + if (c == -1) { 1.1000 + *nextTokPtr = ptr; 1.1001 + return 0; 1.1002 + } 1.1003 + if (c == ASCII_EQUALS) { 1.1004 + *nameEndPtr = ptr; 1.1005 + break; 1.1006 + } 1.1007 + if (isSpace(c)) { 1.1008 + *nameEndPtr = ptr; 1.1009 + do { 1.1010 + ptr += enc->minBytesPerChar; 1.1011 + } while (isSpace(c = toAscii(enc, ptr, end))); 1.1012 + if (c != ASCII_EQUALS) { 1.1013 + *nextTokPtr = ptr; 1.1014 + return 0; 1.1015 + } 1.1016 + break; 1.1017 + } 1.1018 + ptr += enc->minBytesPerChar; 1.1019 + } 1.1020 + if (ptr == *namePtr) { 1.1021 + *nextTokPtr = ptr; 1.1022 + return 0; 1.1023 + } 1.1024 + ptr += enc->minBytesPerChar; 1.1025 + c = toAscii(enc, ptr, end); 1.1026 + while (isSpace(c)) { 1.1027 + ptr += enc->minBytesPerChar; 1.1028 + c = toAscii(enc, ptr, end); 1.1029 + } 1.1030 + if (c != ASCII_QUOT && c != ASCII_APOS) { 1.1031 + *nextTokPtr = ptr; 1.1032 + return 0; 1.1033 + } 1.1034 + open = (char)c; 1.1035 + ptr += enc->minBytesPerChar; 1.1036 + *valPtr = ptr; 1.1037 + for (;; ptr += enc->minBytesPerChar) { 1.1038 + c = toAscii(enc, ptr, end); 1.1039 + if (c == open) 1.1040 + break; 1.1041 + if (!(ASCII_a <= c && c <= ASCII_z) 1.1042 + && !(ASCII_A <= c && c <= ASCII_Z) 1.1043 + && !(ASCII_0 <= c && c <= ASCII_9) 1.1044 + && c != ASCII_PERIOD 1.1045 + && c != ASCII_MINUS 1.1046 + && c != ASCII_UNDERSCORE) { 1.1047 + *nextTokPtr = ptr; 1.1048 + return 0; 1.1049 + } 1.1050 + } 1.1051 + *nextTokPtr = ptr + enc->minBytesPerChar; 1.1052 + return 1; 1.1053 +} 1.1054 + 1.1055 +static const char KW_version[] = { 1.1056 + ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' 1.1057 +}; 1.1058 + 1.1059 +static const char KW_encoding[] = { 1.1060 + ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' 1.1061 +}; 1.1062 + 1.1063 +static const char KW_standalone[] = { 1.1064 + ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, 1.1065 + ASCII_n, ASCII_e, '\0' 1.1066 +}; 1.1067 + 1.1068 +static const char KW_yes[] = { 1.1069 + ASCII_y, ASCII_e, ASCII_s, '\0' 1.1070 +}; 1.1071 + 1.1072 +static const char KW_no[] = { 1.1073 + ASCII_n, ASCII_o, '\0' 1.1074 +}; 1.1075 + 1.1076 +/* BEGIN MOZILLA CHANGE (http://bugzilla.mozilla.org/show_bug.cgi?id=62157) */ 1.1077 +static const char KW_XML_1_0[] = { 1.1078 + ASCII_1, ASCII_PERIOD, ASCII_0, '\0' 1.1079 +}; 1.1080 +/* END MOZILLA CHANGE */ 1.1081 + 1.1082 +static int 1.1083 +doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, 1.1084 + const char *, 1.1085 + const char *), 1.1086 + int isGeneralTextEntity, 1.1087 + const ENCODING *enc, 1.1088 + const char *ptr, 1.1089 + const char *end, 1.1090 + const char **badPtr, 1.1091 + const char **versionPtr, 1.1092 + const char **versionEndPtr, 1.1093 + const char **encodingName, 1.1094 + const ENCODING **encoding, 1.1095 + int *standalone) 1.1096 +{ 1.1097 + const char *val = NULL; 1.1098 + const char *name = NULL; 1.1099 + const char *nameEnd = NULL; 1.1100 + ptr += 5 * enc->minBytesPerChar; 1.1101 + end -= 2 * enc->minBytesPerChar; 1.1102 + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) 1.1103 + || !name) { 1.1104 + *badPtr = ptr; 1.1105 + return 0; 1.1106 + } 1.1107 + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { 1.1108 + if (!isGeneralTextEntity) { 1.1109 + *badPtr = name; 1.1110 + return 0; 1.1111 + } 1.1112 + } 1.1113 + else { 1.1114 + if (versionPtr) 1.1115 + *versionPtr = val; 1.1116 + if (versionEndPtr) 1.1117 + *versionEndPtr = ptr; 1.1118 +/* BEGIN MOZILLA CHANGE (http://bugzilla.mozilla.org/show_bug.cgi?id=62157) */ 1.1119 + /* Anything else but a version="1.0" is invalid for us, until we support later versions. */ 1.1120 + if (!XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_XML_1_0)) { 1.1121 + *badPtr = val; 1.1122 + return 0; 1.1123 + } 1.1124 +/* END MOZILLA CHANGE */ 1.1125 + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 1.1126 + *badPtr = ptr; 1.1127 + return 0; 1.1128 + } 1.1129 + if (!name) { 1.1130 + if (isGeneralTextEntity) { 1.1131 + /* a TextDecl must have an EncodingDecl */ 1.1132 + *badPtr = ptr; 1.1133 + return 0; 1.1134 + } 1.1135 + return 1; 1.1136 + } 1.1137 + } 1.1138 + if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { 1.1139 + int c = toAscii(enc, val, end); 1.1140 + if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { 1.1141 + *badPtr = val; 1.1142 + return 0; 1.1143 + } 1.1144 + if (encodingName) 1.1145 + *encodingName = val; 1.1146 + if (encoding) 1.1147 + *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); 1.1148 + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 1.1149 + *badPtr = ptr; 1.1150 + return 0; 1.1151 + } 1.1152 + if (!name) 1.1153 + return 1; 1.1154 + } 1.1155 + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) 1.1156 + || isGeneralTextEntity) { 1.1157 + *badPtr = name; 1.1158 + return 0; 1.1159 + } 1.1160 + if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { 1.1161 + if (standalone) 1.1162 + *standalone = 1; 1.1163 + } 1.1164 + else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { 1.1165 + if (standalone) 1.1166 + *standalone = 0; 1.1167 + } 1.1168 + else { 1.1169 + *badPtr = val; 1.1170 + return 0; 1.1171 + } 1.1172 + while (isSpace(toAscii(enc, ptr, end))) 1.1173 + ptr += enc->minBytesPerChar; 1.1174 + if (ptr != end) { 1.1175 + *badPtr = ptr; 1.1176 + return 0; 1.1177 + } 1.1178 + return 1; 1.1179 +} 1.1180 + 1.1181 +static int FASTCALL 1.1182 +checkCharRefNumber(int result) 1.1183 +{ 1.1184 + switch (result >> 8) { 1.1185 + case 0xD8: case 0xD9: case 0xDA: case 0xDB: 1.1186 + case 0xDC: case 0xDD: case 0xDE: case 0xDF: 1.1187 + return -1; 1.1188 + case 0: 1.1189 + if (latin1_encoding.type[result] == BT_NONXML) 1.1190 + return -1; 1.1191 + break; 1.1192 + case 0xFF: 1.1193 + if (result == 0xFFFE || result == 0xFFFF) 1.1194 + return -1; 1.1195 + break; 1.1196 + } 1.1197 + return result; 1.1198 +} 1.1199 + 1.1200 +int FASTCALL 1.1201 +XmlUtf8Encode(int c, char *buf) 1.1202 +{ 1.1203 + enum { 1.1204 + /* minN is minimum legal resulting value for N byte sequence */ 1.1205 + min2 = 0x80, 1.1206 + min3 = 0x800, 1.1207 + min4 = 0x10000 1.1208 + }; 1.1209 + 1.1210 + if (c < 0) 1.1211 + return 0; 1.1212 + if (c < min2) { 1.1213 + buf[0] = (char)(c | UTF8_cval1); 1.1214 + return 1; 1.1215 + } 1.1216 + if (c < min3) { 1.1217 + buf[0] = (char)((c >> 6) | UTF8_cval2); 1.1218 + buf[1] = (char)((c & 0x3f) | 0x80); 1.1219 + return 2; 1.1220 + } 1.1221 + if (c < min4) { 1.1222 + buf[0] = (char)((c >> 12) | UTF8_cval3); 1.1223 + buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); 1.1224 + buf[2] = (char)((c & 0x3f) | 0x80); 1.1225 + return 3; 1.1226 + } 1.1227 + if (c < 0x110000) { 1.1228 + buf[0] = (char)((c >> 18) | UTF8_cval4); 1.1229 + buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); 1.1230 + buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); 1.1231 + buf[3] = (char)((c & 0x3f) | 0x80); 1.1232 + return 4; 1.1233 + } 1.1234 + return 0; 1.1235 +} 1.1236 + 1.1237 +int FASTCALL 1.1238 +XmlUtf16Encode(int charNum, unsigned short *buf) 1.1239 +{ 1.1240 + if (charNum < 0) 1.1241 + return 0; 1.1242 + if (charNum < 0x10000) { 1.1243 + buf[0] = (unsigned short)charNum; 1.1244 + return 1; 1.1245 + } 1.1246 + if (charNum < 0x110000) { 1.1247 + charNum -= 0x10000; 1.1248 + buf[0] = (unsigned short)((charNum >> 10) + 0xD800); 1.1249 + buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); 1.1250 + return 2; 1.1251 + } 1.1252 + return 0; 1.1253 +} 1.1254 + 1.1255 +struct unknown_encoding { 1.1256 + struct normal_encoding normal; 1.1257 + CONVERTER convert; 1.1258 + void *userData; 1.1259 + unsigned short utf16[256]; 1.1260 + char utf8[256][4]; 1.1261 +}; 1.1262 + 1.1263 +#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) 1.1264 + 1.1265 +int 1.1266 +XmlSizeOfUnknownEncoding(void) 1.1267 +{ 1.1268 + return sizeof(struct unknown_encoding); 1.1269 +} 1.1270 + 1.1271 +static int PTRFASTCALL 1.1272 +unknown_isName(const ENCODING *enc, const char *p) 1.1273 +{ 1.1274 + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1.1275 + int c = uenc->convert(uenc->userData, p); 1.1276 + if (c & ~0xFFFF) 1.1277 + return 0; 1.1278 + return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); 1.1279 +} 1.1280 + 1.1281 +static int PTRFASTCALL 1.1282 +unknown_isNmstrt(const ENCODING *enc, const char *p) 1.1283 +{ 1.1284 + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1.1285 + int c = uenc->convert(uenc->userData, p); 1.1286 + if (c & ~0xFFFF) 1.1287 + return 0; 1.1288 + return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); 1.1289 +} 1.1290 + 1.1291 +static int PTRFASTCALL 1.1292 +unknown_isInvalid(const ENCODING *enc, const char *p) 1.1293 +{ 1.1294 + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1.1295 + int c = uenc->convert(uenc->userData, p); 1.1296 + return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 1.1297 +} 1.1298 + 1.1299 +static void PTRCALL 1.1300 +unknown_toUtf8(const ENCODING *enc, 1.1301 + const char **fromP, const char *fromLim, 1.1302 + char **toP, const char *toLim) 1.1303 +{ 1.1304 + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1.1305 + char buf[XML_UTF8_ENCODE_MAX]; 1.1306 + for (;;) { 1.1307 + const char *utf8; 1.1308 + int n; 1.1309 + if (*fromP == fromLim) 1.1310 + break; 1.1311 + utf8 = uenc->utf8[(unsigned char)**fromP]; 1.1312 + n = *utf8++; 1.1313 + if (n == 0) { 1.1314 + int c = uenc->convert(uenc->userData, *fromP); 1.1315 + n = XmlUtf8Encode(c, buf); 1.1316 + if (n > toLim - *toP) 1.1317 + break; 1.1318 + utf8 = buf; 1.1319 + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 1.1320 + - (BT_LEAD2 - 2)); 1.1321 + } 1.1322 + else { 1.1323 + if (n > toLim - *toP) 1.1324 + break; 1.1325 + (*fromP)++; 1.1326 + } 1.1327 + do { 1.1328 + *(*toP)++ = *utf8++; 1.1329 + } while (--n != 0); 1.1330 + } 1.1331 +} 1.1332 + 1.1333 +static void PTRCALL 1.1334 +unknown_toUtf16(const ENCODING *enc, 1.1335 + const char **fromP, const char *fromLim, 1.1336 + unsigned short **toP, const unsigned short *toLim) 1.1337 +{ 1.1338 + const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 1.1339 + while (*fromP != fromLim && *toP != toLim) { 1.1340 + unsigned short c = uenc->utf16[(unsigned char)**fromP]; 1.1341 + if (c == 0) { 1.1342 + c = (unsigned short) 1.1343 + uenc->convert(uenc->userData, *fromP); 1.1344 + *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 1.1345 + - (BT_LEAD2 - 2)); 1.1346 + } 1.1347 + else 1.1348 + (*fromP)++; 1.1349 + *(*toP)++ = c; 1.1350 + } 1.1351 +} 1.1352 + 1.1353 +ENCODING * 1.1354 +XmlInitUnknownEncoding(void *mem, 1.1355 + int *table, 1.1356 + CONVERTER convert, 1.1357 + void *userData) 1.1358 +{ 1.1359 + int i; 1.1360 + struct unknown_encoding *e = (struct unknown_encoding *)mem; 1.1361 + for (i = 0; i < (int)sizeof(struct normal_encoding); i++) 1.1362 + ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; 1.1363 + for (i = 0; i < 128; i++) 1.1364 + if (latin1_encoding.type[i] != BT_OTHER 1.1365 + && latin1_encoding.type[i] != BT_NONXML 1.1366 + && table[i] != i) 1.1367 + return 0; 1.1368 + for (i = 0; i < 256; i++) { 1.1369 + int c = table[i]; 1.1370 + if (c == -1) { 1.1371 + e->normal.type[i] = BT_MALFORM; 1.1372 + /* This shouldn't really get used. */ 1.1373 + e->utf16[i] = 0xFFFF; 1.1374 + e->utf8[i][0] = 1; 1.1375 + e->utf8[i][1] = 0; 1.1376 + } 1.1377 + else if (c < 0) { 1.1378 + if (c < -4) 1.1379 + return 0; 1.1380 + e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); 1.1381 + e->utf8[i][0] = 0; 1.1382 + e->utf16[i] = 0; 1.1383 + } 1.1384 + else if (c < 0x80) { 1.1385 + if (latin1_encoding.type[c] != BT_OTHER 1.1386 + && latin1_encoding.type[c] != BT_NONXML 1.1387 + && c != i) 1.1388 + return 0; 1.1389 + e->normal.type[i] = latin1_encoding.type[c]; 1.1390 + e->utf8[i][0] = 1; 1.1391 + e->utf8[i][1] = (char)c; 1.1392 + e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); 1.1393 + } 1.1394 + else if (checkCharRefNumber(c) < 0) { 1.1395 + e->normal.type[i] = BT_NONXML; 1.1396 + /* This shouldn't really get used. */ 1.1397 + e->utf16[i] = 0xFFFF; 1.1398 + e->utf8[i][0] = 1; 1.1399 + e->utf8[i][1] = 0; 1.1400 + } 1.1401 + else { 1.1402 + if (c > 0xFFFF) 1.1403 + return 0; 1.1404 + if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) 1.1405 + e->normal.type[i] = BT_NMSTRT; 1.1406 + else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) 1.1407 + e->normal.type[i] = BT_NAME; 1.1408 + else 1.1409 + e->normal.type[i] = BT_OTHER; 1.1410 + e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); 1.1411 + e->utf16[i] = (unsigned short)c; 1.1412 + } 1.1413 + } 1.1414 + e->userData = userData; 1.1415 + e->convert = convert; 1.1416 + if (convert) { 1.1417 + e->normal.isName2 = unknown_isName; 1.1418 + e->normal.isName3 = unknown_isName; 1.1419 + e->normal.isName4 = unknown_isName; 1.1420 + e->normal.isNmstrt2 = unknown_isNmstrt; 1.1421 + e->normal.isNmstrt3 = unknown_isNmstrt; 1.1422 + e->normal.isNmstrt4 = unknown_isNmstrt; 1.1423 + e->normal.isInvalid2 = unknown_isInvalid; 1.1424 + e->normal.isInvalid3 = unknown_isInvalid; 1.1425 + e->normal.isInvalid4 = unknown_isInvalid; 1.1426 + } 1.1427 + e->normal.enc.utf8Convert = unknown_toUtf8; 1.1428 + e->normal.enc.utf16Convert = unknown_toUtf16; 1.1429 + return &(e->normal.enc); 1.1430 +} 1.1431 + 1.1432 +/* If this enumeration is changed, getEncodingIndex and encodings 1.1433 +must also be changed. */ 1.1434 +enum { 1.1435 + UNKNOWN_ENC = -1, 1.1436 + ISO_8859_1_ENC = 0, 1.1437 + US_ASCII_ENC, 1.1438 + UTF_8_ENC, 1.1439 + UTF_16_ENC, 1.1440 + UTF_16BE_ENC, 1.1441 + UTF_16LE_ENC, 1.1442 + /* must match encodingNames up to here */ 1.1443 + NO_ENC 1.1444 +}; 1.1445 + 1.1446 +static const char KW_ISO_8859_1[] = { 1.1447 + ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, 1.1448 + ASCII_MINUS, ASCII_1, '\0' 1.1449 +}; 1.1450 +static const char KW_US_ASCII[] = { 1.1451 + ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, 1.1452 + '\0' 1.1453 +}; 1.1454 +static const char KW_UTF_8[] = { 1.1455 + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' 1.1456 +}; 1.1457 +static const char KW_UTF_16[] = { 1.1458 + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' 1.1459 +}; 1.1460 +static const char KW_UTF_16BE[] = { 1.1461 + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, 1.1462 + '\0' 1.1463 +}; 1.1464 +static const char KW_UTF_16LE[] = { 1.1465 + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, 1.1466 + '\0' 1.1467 +}; 1.1468 + 1.1469 +static int FASTCALL 1.1470 +getEncodingIndex(const char *name) 1.1471 +{ 1.1472 + static const char * const encodingNames[] = { 1.1473 + KW_ISO_8859_1, 1.1474 + KW_US_ASCII, 1.1475 + KW_UTF_8, 1.1476 + KW_UTF_16, 1.1477 + KW_UTF_16BE, 1.1478 + KW_UTF_16LE, 1.1479 + }; 1.1480 + int i; 1.1481 + if (name == NULL) 1.1482 + return NO_ENC; 1.1483 + for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) 1.1484 + if (streqci(name, encodingNames[i])) 1.1485 + return i; 1.1486 + return UNKNOWN_ENC; 1.1487 +} 1.1488 + 1.1489 +/* For binary compatibility, we store the index of the encoding 1.1490 + specified at initialization in the isUtf16 member. 1.1491 +*/ 1.1492 + 1.1493 +#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) 1.1494 +#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) 1.1495 + 1.1496 +/* This is what detects the encoding. encodingTable maps from 1.1497 + encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of 1.1498 + the external (protocol) specified encoding; state is 1.1499 + XML_CONTENT_STATE if we're parsing an external text entity, and 1.1500 + XML_PROLOG_STATE otherwise. 1.1501 +*/ 1.1502 + 1.1503 + 1.1504 +static int 1.1505 +initScan(const ENCODING * const *encodingTable, 1.1506 + const INIT_ENCODING *enc, 1.1507 + int state, 1.1508 + const char *ptr, 1.1509 + const char *end, 1.1510 + const char **nextTokPtr) 1.1511 +{ 1.1512 + const ENCODING **encPtr; 1.1513 + 1.1514 + if (ptr == end) 1.1515 + return XML_TOK_NONE; 1.1516 + encPtr = enc->encPtr; 1.1517 + if (ptr + 1 == end) { 1.1518 + /* only a single byte available for auto-detection */ 1.1519 +#ifndef XML_DTD /* FIXME */ 1.1520 + /* a well-formed document entity must have more than one byte */ 1.1521 + if (state != XML_CONTENT_STATE) 1.1522 + return XML_TOK_PARTIAL; 1.1523 +#endif 1.1524 + /* so we're parsing an external text entity... */ 1.1525 + /* if UTF-16 was externally specified, then we need at least 2 bytes */ 1.1526 + switch (INIT_ENC_INDEX(enc)) { 1.1527 + case UTF_16_ENC: 1.1528 + case UTF_16LE_ENC: 1.1529 + case UTF_16BE_ENC: 1.1530 + return XML_TOK_PARTIAL; 1.1531 + } 1.1532 + switch ((unsigned char)*ptr) { 1.1533 + case 0xFE: 1.1534 + case 0xFF: 1.1535 + case 0xEF: /* possibly first byte of UTF-8 BOM */ 1.1536 + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 1.1537 + && state == XML_CONTENT_STATE) 1.1538 + break; 1.1539 + /* fall through */ 1.1540 + case 0x00: 1.1541 + case 0x3C: 1.1542 + return XML_TOK_PARTIAL; 1.1543 + } 1.1544 + } 1.1545 + else { 1.1546 + switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { 1.1547 + case 0xFEFF: 1.1548 + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 1.1549 + && state == XML_CONTENT_STATE) 1.1550 + break; 1.1551 + *nextTokPtr = ptr + 2; 1.1552 + *encPtr = encodingTable[UTF_16BE_ENC]; 1.1553 + return XML_TOK_BOM; 1.1554 + /* 00 3C is handled in the default case */ 1.1555 + case 0x3C00: 1.1556 + if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC 1.1557 + || INIT_ENC_INDEX(enc) == UTF_16_ENC) 1.1558 + && state == XML_CONTENT_STATE) 1.1559 + break; 1.1560 + *encPtr = encodingTable[UTF_16LE_ENC]; 1.1561 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1.1562 + case 0xFFFE: 1.1563 + if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC 1.1564 + && state == XML_CONTENT_STATE) 1.1565 + break; 1.1566 + *nextTokPtr = ptr + 2; 1.1567 + *encPtr = encodingTable[UTF_16LE_ENC]; 1.1568 + return XML_TOK_BOM; 1.1569 + case 0xEFBB: 1.1570 + /* Maybe a UTF-8 BOM (EF BB BF) */ 1.1571 + /* If there's an explicitly specified (external) encoding 1.1572 + of ISO-8859-1 or some flavour of UTF-16 1.1573 + and this is an external text entity, 1.1574 + don't look for the BOM, 1.1575 + because it might be a legal data. 1.1576 + */ 1.1577 + if (state == XML_CONTENT_STATE) { 1.1578 + int e = INIT_ENC_INDEX(enc); 1.1579 + if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC 1.1580 + || e == UTF_16LE_ENC || e == UTF_16_ENC) 1.1581 + break; 1.1582 + } 1.1583 + if (ptr + 2 == end) 1.1584 + return XML_TOK_PARTIAL; 1.1585 + if ((unsigned char)ptr[2] == 0xBF) { 1.1586 + *nextTokPtr = ptr + 3; 1.1587 + *encPtr = encodingTable[UTF_8_ENC]; 1.1588 + return XML_TOK_BOM; 1.1589 + } 1.1590 + break; 1.1591 + default: 1.1592 + if (ptr[0] == '\0') { 1.1593 + /* 0 isn't a legal data character. Furthermore a document 1.1594 + entity can only start with ASCII characters. So the only 1.1595 + way this can fail to be big-endian UTF-16 if it it's an 1.1596 + external parsed general entity that's labelled as 1.1597 + UTF-16LE. 1.1598 + */ 1.1599 + if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) 1.1600 + break; 1.1601 + *encPtr = encodingTable[UTF_16BE_ENC]; 1.1602 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1.1603 + } 1.1604 + else if (ptr[1] == '\0') { 1.1605 + /* We could recover here in the case: 1.1606 + - parsing an external entity 1.1607 + - second byte is 0 1.1608 + - no externally specified encoding 1.1609 + - no encoding declaration 1.1610 + by assuming UTF-16LE. But we don't, because this would mean when 1.1611 + presented just with a single byte, we couldn't reliably determine 1.1612 + whether we needed further bytes. 1.1613 + */ 1.1614 + if (state == XML_CONTENT_STATE) 1.1615 + break; 1.1616 + *encPtr = encodingTable[UTF_16LE_ENC]; 1.1617 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1.1618 + } 1.1619 + break; 1.1620 + } 1.1621 + } 1.1622 + *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; 1.1623 + return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 1.1624 +} 1.1625 + 1.1626 + 1.1627 +#define NS(x) x 1.1628 +#define ns(x) x 1.1629 +#include "xmltok_ns.c" 1.1630 +#undef NS 1.1631 +#undef ns 1.1632 + 1.1633 +#ifdef XML_NS 1.1634 + 1.1635 +#define NS(x) x ## NS 1.1636 +#define ns(x) x ## _ns 1.1637 + 1.1638 +#include "xmltok_ns.c" 1.1639 + 1.1640 +#undef NS 1.1641 +#undef ns 1.1642 + 1.1643 +ENCODING * 1.1644 +XmlInitUnknownEncodingNS(void *mem, 1.1645 + int *table, 1.1646 + CONVERTER convert, 1.1647 + void *userData) 1.1648 +{ 1.1649 + ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); 1.1650 + if (enc) 1.1651 + ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; 1.1652 + return enc; 1.1653 +} 1.1654 + 1.1655 +#endif /* XML_NS */ 1.1656 + 1.1657 +/* BEGIN MOZILLA CHANGE (Mozilla extensions for QName checking) */ 1.1658 +#ifdef MOZILLA_CLIENT 1.1659 +#include "moz_extensions.c" 1.1660 +#endif /* MOZILLA_CLIENT */ 1.1661 +/* END MOZILLA CHANGE */