michael@0: /* michael@0: * Copyright © 2011,2012 Google, Inc. michael@0: * michael@0: * This is part of HarfBuzz, a text shaping library. michael@0: * michael@0: * Permission is hereby granted, without written agreement and without michael@0: * license or royalty fees, to use, copy, modify, and distribute this michael@0: * software and its documentation for any purpose, provided that the michael@0: * above copyright notice and the following two paragraphs appear in michael@0: * all copies of this software. michael@0: * michael@0: * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR michael@0: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES michael@0: * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN michael@0: * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH michael@0: * DAMAGE. michael@0: * michael@0: * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, michael@0: * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND michael@0: * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS michael@0: * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO michael@0: * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. michael@0: * michael@0: * Google Author(s): Behdad Esfahbod michael@0: */ michael@0: michael@0: #ifndef HB_UTF_PRIVATE_HH michael@0: #define HB_UTF_PRIVATE_HH michael@0: michael@0: #include "hb-private.hh" michael@0: michael@0: michael@0: /* UTF-8 */ michael@0: michael@0: #define HB_UTF8_COMPUTE(Char, Mask, Len) \ michael@0: if (Char < 128) { Len = 1; Mask = 0x7f; } \ michael@0: else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ michael@0: else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ michael@0: else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ michael@0: else Len = 0; michael@0: michael@0: static inline const uint8_t * michael@0: hb_utf_next (const uint8_t *text, michael@0: const uint8_t *end, michael@0: hb_codepoint_t *unicode) michael@0: { michael@0: hb_codepoint_t c = *text, mask; michael@0: unsigned int len; michael@0: michael@0: /* TODO check for overlong sequences? */ michael@0: michael@0: HB_UTF8_COMPUTE (c, mask, len); michael@0: if (unlikely (!len || (unsigned int) (end - text) < len)) { michael@0: *unicode = -1; michael@0: return text + 1; michael@0: } else { michael@0: hb_codepoint_t result; michael@0: unsigned int i; michael@0: result = c & mask; michael@0: for (i = 1; i < len; i++) michael@0: { michael@0: if (unlikely ((text[i] & 0xc0) != 0x80)) michael@0: { michael@0: *unicode = -1; michael@0: return text + 1; michael@0: } michael@0: result <<= 6; michael@0: result |= (text[i] & 0x3f); michael@0: } michael@0: *unicode = result; michael@0: return text + len; michael@0: } michael@0: } michael@0: michael@0: static inline const uint8_t * michael@0: hb_utf_prev (const uint8_t *text, michael@0: const uint8_t *start, michael@0: hb_codepoint_t *unicode) michael@0: { michael@0: const uint8_t *end = text--; michael@0: while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) michael@0: text--; michael@0: michael@0: hb_codepoint_t c = *text, mask; michael@0: unsigned int len; michael@0: michael@0: /* TODO check for overlong sequences? */ michael@0: michael@0: HB_UTF8_COMPUTE (c, mask, len); michael@0: if (unlikely (!len || (unsigned int) (end - text) != len)) { michael@0: *unicode = -1; michael@0: return end - 1; michael@0: } else { michael@0: hb_codepoint_t result; michael@0: unsigned int i; michael@0: result = c & mask; michael@0: for (i = 1; i < len; i++) michael@0: { michael@0: result <<= 6; michael@0: result |= (text[i] & 0x3f); michael@0: } michael@0: *unicode = result; michael@0: return text; michael@0: } michael@0: } michael@0: michael@0: michael@0: static inline unsigned int michael@0: hb_utf_strlen (const uint8_t *text) michael@0: { michael@0: return strlen ((const char *) text); michael@0: } michael@0: michael@0: michael@0: /* UTF-16 */ michael@0: michael@0: static inline const uint16_t * michael@0: hb_utf_next (const uint16_t *text, michael@0: const uint16_t *end, michael@0: hb_codepoint_t *unicode) michael@0: { michael@0: hb_codepoint_t c = *text++; michael@0: michael@0: if (unlikely (hb_in_range (c, 0xd800, 0xdbff))) michael@0: { michael@0: /* high surrogate */ michael@0: hb_codepoint_t l; michael@0: if (text < end && ((l = *text), likely (hb_in_range (l, 0xdc00, 0xdfff)))) michael@0: { michael@0: /* low surrogate */ michael@0: *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00); michael@0: text++; michael@0: } else michael@0: *unicode = -1; michael@0: } else michael@0: *unicode = c; michael@0: michael@0: return text; michael@0: } michael@0: michael@0: static inline const uint16_t * michael@0: hb_utf_prev (const uint16_t *text, michael@0: const uint16_t *start, michael@0: hb_codepoint_t *unicode) michael@0: { michael@0: hb_codepoint_t c = *--text; michael@0: michael@0: if (unlikely (hb_in_range (c, 0xdc00, 0xdfff))) michael@0: { michael@0: /* low surrogate */ michael@0: hb_codepoint_t h; michael@0: if (start < text && ((h = *(text - 1)), likely (hb_in_range (h, 0xd800, 0xdbff)))) michael@0: { michael@0: /* high surrogate */ michael@0: *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); michael@0: text--; michael@0: } else michael@0: *unicode = -1; michael@0: } else michael@0: *unicode = c; michael@0: michael@0: return text; michael@0: } michael@0: michael@0: michael@0: static inline unsigned int michael@0: hb_utf_strlen (const uint16_t *text) michael@0: { michael@0: unsigned int l = 0; michael@0: while (*text++) l++; michael@0: return l; michael@0: } michael@0: michael@0: michael@0: /* UTF-32 */ michael@0: michael@0: static inline const uint32_t * michael@0: hb_utf_next (const uint32_t *text, michael@0: const uint32_t *end HB_UNUSED, michael@0: hb_codepoint_t *unicode) michael@0: { michael@0: *unicode = *text++; michael@0: return text; michael@0: } michael@0: michael@0: static inline const uint32_t * michael@0: hb_utf_prev (const uint32_t *text, michael@0: const uint32_t *start HB_UNUSED, michael@0: hb_codepoint_t *unicode) michael@0: { michael@0: *unicode = *--text; michael@0: return text; michael@0: } michael@0: michael@0: static inline unsigned int michael@0: hb_utf_strlen (const uint32_t *text) michael@0: { michael@0: unsigned int l = 0; michael@0: while (*text++) l++; michael@0: return l; michael@0: } michael@0: michael@0: michael@0: #endif /* HB_UTF_PRIVATE_HH */