Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright © 2011,2012 Google, Inc. |
michael@0 | 3 | * |
michael@0 | 4 | * This is part of HarfBuzz, a text shaping library. |
michael@0 | 5 | * |
michael@0 | 6 | * Permission is hereby granted, without written agreement and without |
michael@0 | 7 | * license or royalty fees, to use, copy, modify, and distribute this |
michael@0 | 8 | * software and its documentation for any purpose, provided that the |
michael@0 | 9 | * above copyright notice and the following two paragraphs appear in |
michael@0 | 10 | * all copies of this software. |
michael@0 | 11 | * |
michael@0 | 12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
michael@0 | 13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
michael@0 | 14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
michael@0 | 15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
michael@0 | 16 | * DAMAGE. |
michael@0 | 17 | * |
michael@0 | 18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
michael@0 | 19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
michael@0 | 20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
michael@0 | 21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
michael@0 | 22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
michael@0 | 23 | * |
michael@0 | 24 | * Google Author(s): Behdad Esfahbod |
michael@0 | 25 | */ |
michael@0 | 26 | |
michael@0 | 27 | #ifndef HB_UTF_PRIVATE_HH |
michael@0 | 28 | #define HB_UTF_PRIVATE_HH |
michael@0 | 29 | |
michael@0 | 30 | #include "hb-private.hh" |
michael@0 | 31 | |
michael@0 | 32 | |
michael@0 | 33 | /* UTF-8 */ |
michael@0 | 34 | |
michael@0 | 35 | #define HB_UTF8_COMPUTE(Char, Mask, Len) \ |
michael@0 | 36 | if (Char < 128) { Len = 1; Mask = 0x7f; } \ |
michael@0 | 37 | else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ |
michael@0 | 38 | else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ |
michael@0 | 39 | else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ |
michael@0 | 40 | else Len = 0; |
michael@0 | 41 | |
michael@0 | 42 | static inline const uint8_t * |
michael@0 | 43 | hb_utf_next (const uint8_t *text, |
michael@0 | 44 | const uint8_t *end, |
michael@0 | 45 | hb_codepoint_t *unicode) |
michael@0 | 46 | { |
michael@0 | 47 | hb_codepoint_t c = *text, mask; |
michael@0 | 48 | unsigned int len; |
michael@0 | 49 | |
michael@0 | 50 | /* TODO check for overlong sequences? */ |
michael@0 | 51 | |
michael@0 | 52 | HB_UTF8_COMPUTE (c, mask, len); |
michael@0 | 53 | if (unlikely (!len || (unsigned int) (end - text) < len)) { |
michael@0 | 54 | *unicode = -1; |
michael@0 | 55 | return text + 1; |
michael@0 | 56 | } else { |
michael@0 | 57 | hb_codepoint_t result; |
michael@0 | 58 | unsigned int i; |
michael@0 | 59 | result = c & mask; |
michael@0 | 60 | for (i = 1; i < len; i++) |
michael@0 | 61 | { |
michael@0 | 62 | if (unlikely ((text[i] & 0xc0) != 0x80)) |
michael@0 | 63 | { |
michael@0 | 64 | *unicode = -1; |
michael@0 | 65 | return text + 1; |
michael@0 | 66 | } |
michael@0 | 67 | result <<= 6; |
michael@0 | 68 | result |= (text[i] & 0x3f); |
michael@0 | 69 | } |
michael@0 | 70 | *unicode = result; |
michael@0 | 71 | return text + len; |
michael@0 | 72 | } |
michael@0 | 73 | } |
michael@0 | 74 | |
michael@0 | 75 | static inline const uint8_t * |
michael@0 | 76 | hb_utf_prev (const uint8_t *text, |
michael@0 | 77 | const uint8_t *start, |
michael@0 | 78 | hb_codepoint_t *unicode) |
michael@0 | 79 | { |
michael@0 | 80 | const uint8_t *end = text--; |
michael@0 | 81 | while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) |
michael@0 | 82 | text--; |
michael@0 | 83 | |
michael@0 | 84 | hb_codepoint_t c = *text, mask; |
michael@0 | 85 | unsigned int len; |
michael@0 | 86 | |
michael@0 | 87 | /* TODO check for overlong sequences? */ |
michael@0 | 88 | |
michael@0 | 89 | HB_UTF8_COMPUTE (c, mask, len); |
michael@0 | 90 | if (unlikely (!len || (unsigned int) (end - text) != len)) { |
michael@0 | 91 | *unicode = -1; |
michael@0 | 92 | return end - 1; |
michael@0 | 93 | } else { |
michael@0 | 94 | hb_codepoint_t result; |
michael@0 | 95 | unsigned int i; |
michael@0 | 96 | result = c & mask; |
michael@0 | 97 | for (i = 1; i < len; i++) |
michael@0 | 98 | { |
michael@0 | 99 | result <<= 6; |
michael@0 | 100 | result |= (text[i] & 0x3f); |
michael@0 | 101 | } |
michael@0 | 102 | *unicode = result; |
michael@0 | 103 | return text; |
michael@0 | 104 | } |
michael@0 | 105 | } |
michael@0 | 106 | |
michael@0 | 107 | |
michael@0 | 108 | static inline unsigned int |
michael@0 | 109 | hb_utf_strlen (const uint8_t *text) |
michael@0 | 110 | { |
michael@0 | 111 | return strlen ((const char *) text); |
michael@0 | 112 | } |
michael@0 | 113 | |
michael@0 | 114 | |
michael@0 | 115 | /* UTF-16 */ |
michael@0 | 116 | |
michael@0 | 117 | static inline const uint16_t * |
michael@0 | 118 | hb_utf_next (const uint16_t *text, |
michael@0 | 119 | const uint16_t *end, |
michael@0 | 120 | hb_codepoint_t *unicode) |
michael@0 | 121 | { |
michael@0 | 122 | hb_codepoint_t c = *text++; |
michael@0 | 123 | |
michael@0 | 124 | if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff))) |
michael@0 | 125 | { |
michael@0 | 126 | /* high surrogate */ |
michael@0 | 127 | hb_codepoint_t l; |
michael@0 | 128 | if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff)))) |
michael@0 | 129 | { |
michael@0 | 130 | /* low surrogate */ |
michael@0 | 131 | *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00); |
michael@0 | 132 | text++; |
michael@0 | 133 | } else |
michael@0 | 134 | *unicode = -1; |
michael@0 | 135 | } else |
michael@0 | 136 | *unicode = c; |
michael@0 | 137 | |
michael@0 | 138 | return text; |
michael@0 | 139 | } |
michael@0 | 140 | |
michael@0 | 141 | static inline const uint16_t * |
michael@0 | 142 | hb_utf_prev (const uint16_t *text, |
michael@0 | 143 | const uint16_t *start, |
michael@0 | 144 | hb_codepoint_t *unicode) |
michael@0 | 145 | { |
michael@0 | 146 | hb_codepoint_t c = *--text; |
michael@0 | 147 | |
michael@0 | 148 | if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff))) |
michael@0 | 149 | { |
michael@0 | 150 | /* low surrogate */ |
michael@0 | 151 | hb_codepoint_t h; |
michael@0 | 152 | if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff)))) |
michael@0 | 153 | { |
michael@0 | 154 | /* high surrogate */ |
michael@0 | 155 | *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); |
michael@0 | 156 | text--; |
michael@0 | 157 | } else |
michael@0 | 158 | *unicode = -1; |
michael@0 | 159 | } else |
michael@0 | 160 | *unicode = c; |
michael@0 | 161 | |
michael@0 | 162 | return text; |
michael@0 | 163 | } |
michael@0 | 164 | |
michael@0 | 165 | |
michael@0 | 166 | static inline unsigned int |
michael@0 | 167 | hb_utf_strlen (const uint16_t *text) |
michael@0 | 168 | { |
michael@0 | 169 | unsigned int l = 0; |
michael@0 | 170 | while (*text++) l++; |
michael@0 | 171 | return l; |
michael@0 | 172 | } |
michael@0 | 173 | |
michael@0 | 174 | |
michael@0 | 175 | /* UTF-32 */ |
michael@0 | 176 | |
michael@0 | 177 | static inline const uint32_t * |
michael@0 | 178 | hb_utf_next (const uint32_t *text, |
michael@0 | 179 | const uint32_t *end HB_UNUSED, |
michael@0 | 180 | hb_codepoint_t *unicode) |
michael@0 | 181 | { |
michael@0 | 182 | *unicode = *text++; |
michael@0 | 183 | return text; |
michael@0 | 184 | } |
michael@0 | 185 | |
michael@0 | 186 | static inline const uint32_t * |
michael@0 | 187 | hb_utf_prev (const uint32_t *text, |
michael@0 | 188 | const uint32_t *start HB_UNUSED, |
michael@0 | 189 | hb_codepoint_t *unicode) |
michael@0 | 190 | { |
michael@0 | 191 | *unicode = *--text; |
michael@0 | 192 | return text; |
michael@0 | 193 | } |
michael@0 | 194 | |
michael@0 | 195 | static inline unsigned int |
michael@0 | 196 | hb_utf_strlen (const uint32_t *text) |
michael@0 | 197 | { |
michael@0 | 198 | unsigned int l = 0; |
michael@0 | 199 | while (*text++) l++; |
michael@0 | 200 | return l; |
michael@0 | 201 | } |
michael@0 | 202 | |
michael@0 | 203 | |
michael@0 | 204 | #endif /* HB_UTF_PRIVATE_HH */ |