michael@0: /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifdef IS_LITTLE_ENDIAN michael@0: michael@0: #define PREFIX(ident) little2_ ## ident michael@0: #define BYTE_TYPE(p) LITTLE2_BYTE_TYPE(XmlGetUtf16InternalEncodingNS(), p) michael@0: #define IS_NAME_CHAR_MINBPC(p) LITTLE2_IS_NAME_CHAR_MINBPC(0, p) michael@0: #define IS_NMSTRT_CHAR_MINBPC(p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(0, p) michael@0: michael@0: #else michael@0: michael@0: #define PREFIX(ident) big2_ ## ident michael@0: #define BYTE_TYPE(p) BIG2_BYTE_TYPE(XmlGetUtf16InternalEncodingNS(), p) michael@0: #define IS_NAME_CHAR_MINBPC(p) BIG2_IS_NAME_CHAR_MINBPC(0, p) michael@0: #define IS_NMSTRT_CHAR_MINBPC(p) BIG2_IS_NMSTRT_CHAR_MINBPC(0, p) michael@0: michael@0: #endif michael@0: michael@0: #define MOZ_EXPAT_VALID_QNAME (0) michael@0: #define MOZ_EXPAT_EMPTY_QNAME (1 << 0) michael@0: #define MOZ_EXPAT_INVALID_CHARACTER (1 << 1) michael@0: #define MOZ_EXPAT_MALFORMED (1 << 2) michael@0: michael@0: int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware, michael@0: const char** colon) michael@0: { michael@0: int result = MOZ_EXPAT_VALID_QNAME; michael@0: int nmstrt = 1; michael@0: *colon = 0; michael@0: if (ptr == end) { michael@0: return MOZ_EXPAT_EMPTY_QNAME; michael@0: } michael@0: do { michael@0: switch (BYTE_TYPE(ptr)) { michael@0: case BT_COLON: michael@0: /* We're namespace-aware and either first or last character is a colon michael@0: or we've already seen a colon. */ michael@0: if (ns_aware && (nmstrt || *colon || ptr + 2 == end)) { michael@0: return MOZ_EXPAT_MALFORMED; michael@0: } michael@0: *colon = ptr; michael@0: nmstrt = ns_aware; /* e.g. "a:0" should be valid if !ns_aware */ michael@0: break; michael@0: case BT_NONASCII: michael@0: if (nmstrt && !IS_NMSTRT_CHAR_MINBPC(ptr)) { michael@0: /* If this is a valid name character and we're namespace-aware, the michael@0: QName is malformed. Otherwise, this character's invalid at the michael@0: start of a name (or, if we're namespace-aware, at the start of a michael@0: localpart). */ michael@0: return (IS_NAME_CHAR_MINBPC(ptr) && ns_aware) ? michael@0: MOZ_EXPAT_MALFORMED : michael@0: MOZ_EXPAT_INVALID_CHARACTER; michael@0: } michael@0: if (!IS_NAME_CHAR_MINBPC(ptr)) { michael@0: return MOZ_EXPAT_INVALID_CHARACTER; michael@0: } michael@0: nmstrt = 0; michael@0: break; michael@0: case BT_NMSTRT: michael@0: case BT_HEX: michael@0: nmstrt = 0; michael@0: break; michael@0: case BT_DIGIT: michael@0: case BT_NAME: michael@0: case BT_MINUS: michael@0: if (nmstrt) { michael@0: return MOZ_EXPAT_INVALID_CHARACTER; michael@0: } michael@0: break; michael@0: default: michael@0: return MOZ_EXPAT_INVALID_CHARACTER; michael@0: } michael@0: ptr += 2; michael@0: } while (ptr != end); michael@0: return result; michael@0: } michael@0: michael@0: int MOZ_XMLIsLetter(const char* ptr) michael@0: { michael@0: switch (BYTE_TYPE(ptr)) { michael@0: case BT_NONASCII: michael@0: if (!IS_NMSTRT_CHAR_MINBPC(ptr)) { michael@0: return 0; michael@0: } michael@0: /* fall through */ michael@0: case BT_NMSTRT: michael@0: case BT_HEX: michael@0: return 1; michael@0: default: michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: int MOZ_XMLIsNCNameChar(const char* ptr) michael@0: { michael@0: switch (BYTE_TYPE(ptr)) { michael@0: case BT_NONASCII: michael@0: if (!IS_NAME_CHAR_MINBPC(ptr)) { michael@0: return 0; michael@0: } michael@0: /* fall through */ michael@0: case BT_NMSTRT: michael@0: case BT_HEX: michael@0: case BT_DIGIT: michael@0: case BT_NAME: michael@0: case BT_MINUS: michael@0: return 1; michael@0: default: michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: int MOZ_XMLTranslateEntity(const char* ptr, const char* end, const char** next, michael@0: XML_Char* result) michael@0: { michael@0: const ENCODING* enc = XmlGetUtf16InternalEncodingNS(); michael@0: int tok = PREFIX(scanRef)(enc, ptr, end, next); michael@0: if (tok <= XML_TOK_INVALID) { michael@0: return 0; michael@0: } michael@0: michael@0: if (tok == XML_TOK_CHAR_REF) { michael@0: int n = XmlCharRefNumber(enc, ptr); michael@0: michael@0: /* We could get away with just < 0, but better safe than sorry. */ michael@0: if (n <= 0) { michael@0: return 0; michael@0: } michael@0: michael@0: return XmlUtf16Encode(n, (unsigned short*)result); michael@0: } michael@0: michael@0: if (tok == XML_TOK_ENTITY_REF) { michael@0: /* *next points to after the semicolon, so the entity ends at michael@0: *next - enc->minBytesPerChar. */ michael@0: XML_Char ch = michael@0: (XML_Char)XmlPredefinedEntityName(enc, ptr, *next - enc->minBytesPerChar); michael@0: if (!ch) { michael@0: return 0; michael@0: } michael@0: michael@0: *result = ch; michael@0: return 1; michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: #undef PREFIX michael@0: #undef BYTE_TYPE michael@0: #undef IS_NAME_CHAR_MINBPC michael@0: #undef IS_NMSTRT_CHAR_MINBPC