|
1 /* |
|
2 * Copyright © 2011,2012 Google, Inc. |
|
3 * |
|
4 * This is part of HarfBuzz, a text shaping library. |
|
5 * |
|
6 * Permission is hereby granted, without written agreement and without |
|
7 * license or royalty fees, to use, copy, modify, and distribute this |
|
8 * software and its documentation for any purpose, provided that the |
|
9 * above copyright notice and the following two paragraphs appear in |
|
10 * all copies of this software. |
|
11 * |
|
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
|
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
|
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
|
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
|
16 * DAMAGE. |
|
17 * |
|
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
|
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
|
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
|
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
|
23 * |
|
24 * Google Author(s): Behdad Esfahbod |
|
25 */ |
|
26 |
|
27 #ifndef HB_UTF_PRIVATE_HH |
|
28 #define HB_UTF_PRIVATE_HH |
|
29 |
|
30 #include "hb-private.hh" |
|
31 |
|
32 |
|
33 /* UTF-8 */ |
|
34 |
|
35 #define HB_UTF8_COMPUTE(Char, Mask, Len) \ |
|
36 if (Char < 128) { Len = 1; Mask = 0x7f; } \ |
|
37 else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ |
|
38 else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ |
|
39 else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ |
|
40 else Len = 0; |
|
41 |
|
42 static inline const uint8_t * |
|
43 hb_utf_next (const uint8_t *text, |
|
44 const uint8_t *end, |
|
45 hb_codepoint_t *unicode) |
|
46 { |
|
47 hb_codepoint_t c = *text, mask; |
|
48 unsigned int len; |
|
49 |
|
50 /* TODO check for overlong sequences? */ |
|
51 |
|
52 HB_UTF8_COMPUTE (c, mask, len); |
|
53 if (unlikely (!len || (unsigned int) (end - text) < len)) { |
|
54 *unicode = -1; |
|
55 return text + 1; |
|
56 } else { |
|
57 hb_codepoint_t result; |
|
58 unsigned int i; |
|
59 result = c & mask; |
|
60 for (i = 1; i < len; i++) |
|
61 { |
|
62 if (unlikely ((text[i] & 0xc0) != 0x80)) |
|
63 { |
|
64 *unicode = -1; |
|
65 return text + 1; |
|
66 } |
|
67 result <<= 6; |
|
68 result |= (text[i] & 0x3f); |
|
69 } |
|
70 *unicode = result; |
|
71 return text + len; |
|
72 } |
|
73 } |
|
74 |
|
75 static inline const uint8_t * |
|
76 hb_utf_prev (const uint8_t *text, |
|
77 const uint8_t *start, |
|
78 hb_codepoint_t *unicode) |
|
79 { |
|
80 const uint8_t *end = text--; |
|
81 while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) |
|
82 text--; |
|
83 |
|
84 hb_codepoint_t c = *text, mask; |
|
85 unsigned int len; |
|
86 |
|
87 /* TODO check for overlong sequences? */ |
|
88 |
|
89 HB_UTF8_COMPUTE (c, mask, len); |
|
90 if (unlikely (!len || (unsigned int) (end - text) != len)) { |
|
91 *unicode = -1; |
|
92 return end - 1; |
|
93 } else { |
|
94 hb_codepoint_t result; |
|
95 unsigned int i; |
|
96 result = c & mask; |
|
97 for (i = 1; i < len; i++) |
|
98 { |
|
99 result <<= 6; |
|
100 result |= (text[i] & 0x3f); |
|
101 } |
|
102 *unicode = result; |
|
103 return text; |
|
104 } |
|
105 } |
|
106 |
|
107 |
|
108 static inline unsigned int |
|
109 hb_utf_strlen (const uint8_t *text) |
|
110 { |
|
111 return strlen ((const char *) text); |
|
112 } |
|
113 |
|
114 |
|
115 /* UTF-16 */ |
|
116 |
|
117 static inline const uint16_t * |
|
118 hb_utf_next (const uint16_t *text, |
|
119 const uint16_t *end, |
|
120 hb_codepoint_t *unicode) |
|
121 { |
|
122 hb_codepoint_t c = *text++; |
|
123 |
|
124 if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff))) |
|
125 { |
|
126 /* high surrogate */ |
|
127 hb_codepoint_t l; |
|
128 if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff)))) |
|
129 { |
|
130 /* low surrogate */ |
|
131 *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00); |
|
132 text++; |
|
133 } else |
|
134 *unicode = -1; |
|
135 } else |
|
136 *unicode = c; |
|
137 |
|
138 return text; |
|
139 } |
|
140 |
|
141 static inline const uint16_t * |
|
142 hb_utf_prev (const uint16_t *text, |
|
143 const uint16_t *start, |
|
144 hb_codepoint_t *unicode) |
|
145 { |
|
146 hb_codepoint_t c = *--text; |
|
147 |
|
148 if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff))) |
|
149 { |
|
150 /* low surrogate */ |
|
151 hb_codepoint_t h; |
|
152 if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff)))) |
|
153 { |
|
154 /* high surrogate */ |
|
155 *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); |
|
156 text--; |
|
157 } else |
|
158 *unicode = -1; |
|
159 } else |
|
160 *unicode = c; |
|
161 |
|
162 return text; |
|
163 } |
|
164 |
|
165 |
|
166 static inline unsigned int |
|
167 hb_utf_strlen (const uint16_t *text) |
|
168 { |
|
169 unsigned int l = 0; |
|
170 while (*text++) l++; |
|
171 return l; |
|
172 } |
|
173 |
|
174 |
|
175 /* UTF-32 */ |
|
176 |
|
177 static inline const uint32_t * |
|
178 hb_utf_next (const uint32_t *text, |
|
179 const uint32_t *end HB_UNUSED, |
|
180 hb_codepoint_t *unicode) |
|
181 { |
|
182 *unicode = *text++; |
|
183 return text; |
|
184 } |
|
185 |
|
186 static inline const uint32_t * |
|
187 hb_utf_prev (const uint32_t *text, |
|
188 const uint32_t *start HB_UNUSED, |
|
189 hb_codepoint_t *unicode) |
|
190 { |
|
191 *unicode = *--text; |
|
192 return text; |
|
193 } |
|
194 |
|
195 static inline unsigned int |
|
196 hb_utf_strlen (const uint32_t *text) |
|
197 { |
|
198 unsigned int l = 0; |
|
199 while (*text++) l++; |
|
200 return l; |
|
201 } |
|
202 |
|
203 |
|
204 #endif /* HB_UTF_PRIVATE_HH */ |