|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 //---------------------------------------------------------------------- |
|
7 // Global functions and data [declaration] |
|
8 #include "nsUnicodeToUTF8.h" |
|
9 |
|
10 NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder) |
|
11 |
|
12 //---------------------------------------------------------------------- |
|
13 // nsUnicodeToUTF8 class [implementation] |
|
14 |
|
15 NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc, |
|
16 int32_t aSrcLength, |
|
17 int32_t * aDestLength) |
|
18 { |
|
19 // aSrc is interpreted as UTF16, 3 is normally enough. |
|
20 // But when previous buffer only contains part of the surrogate pair, we |
|
21 // need to complete it here. If the first word in following buffer is not |
|
22 // in valid surrogate range, we need to convert the remaining of last buffer |
|
23 // to 3 bytes. |
|
24 *aDestLength = 3*aSrcLength + 3; |
|
25 return NS_OK; |
|
26 } |
|
27 |
|
28 NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc, |
|
29 int32_t * aSrcLength, |
|
30 char * aDest, |
|
31 int32_t * aDestLength) |
|
32 { |
|
33 const char16_t * src = aSrc; |
|
34 const char16_t * srcEnd = aSrc + *aSrcLength; |
|
35 char * dest = aDest; |
|
36 int32_t destLen = *aDestLength; |
|
37 uint32_t n; |
|
38 |
|
39 //complete remaining of last conversion |
|
40 if (mHighSurrogate) { |
|
41 if (src < srcEnd) { |
|
42 *aDestLength = 0; |
|
43 return NS_OK_UENC_MOREINPUT; |
|
44 } |
|
45 if (*aDestLength < 4) { |
|
46 *aSrcLength = 0; |
|
47 *aDestLength = 0; |
|
48 return NS_OK_UENC_MOREOUTPUT; |
|
49 } |
|
50 if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair |
|
51 *dest++ = (char)0xef; //replacement character |
|
52 *dest++ = (char)0xbf; |
|
53 *dest++ = (char)0xbd; |
|
54 destLen -= 3; |
|
55 } else { |
|
56 n = ((mHighSurrogate - (char16_t)0xd800) << 10) + |
|
57 (*src - (char16_t)0xdc00) + 0x10000; |
|
58 *dest++ = (char)0xf0 | (n >> 18); |
|
59 *dest++ = (char)0x80 | ((n >> 12) & 0x3f); |
|
60 *dest++ = (char)0x80 | ((n >> 6) & 0x3f); |
|
61 *dest++ = (char)0x80 | (n & 0x3f); |
|
62 ++src; |
|
63 destLen -= 4; |
|
64 } |
|
65 mHighSurrogate = 0; |
|
66 } |
|
67 |
|
68 while (src < srcEnd) { |
|
69 if ( *src <= 0x007f) { |
|
70 if (destLen < 1) |
|
71 goto error_more_output; |
|
72 *dest++ = (char)*src; |
|
73 --destLen; |
|
74 } else if (*src <= 0x07ff) { |
|
75 if (destLen < 2) |
|
76 goto error_more_output; |
|
77 *dest++ = (char)0xc0 | (*src >> 6); |
|
78 *dest++ = (char)0x80 | (*src & 0x003f); |
|
79 destLen -= 2; |
|
80 } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) { |
|
81 if (*src >= (char16_t)0xdc00) { //not a pair |
|
82 if (destLen < 3) |
|
83 goto error_more_output; |
|
84 *dest++ = (char)0xef; //replacement character |
|
85 *dest++ = (char)0xbf; |
|
86 *dest++ = (char)0xbd; |
|
87 destLen -= 3; |
|
88 ++src; |
|
89 continue; |
|
90 } |
|
91 if ((src+1) >= srcEnd) { |
|
92 //we need another surrogate to complete this unicode char |
|
93 mHighSurrogate = *src; |
|
94 *aDestLength = dest - aDest; |
|
95 return NS_OK_UENC_MOREINPUT; |
|
96 } |
|
97 //handle surrogate |
|
98 if (destLen < 4) |
|
99 goto error_more_output; |
|
100 if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair |
|
101 *dest++ = (char)0xef; //replacement character |
|
102 *dest++ = (char)0xbf; |
|
103 *dest++ = (char)0xbd; |
|
104 destLen -= 3; |
|
105 } else { |
|
106 n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000; |
|
107 *dest++ = (char)0xf0 | (n >> 18); |
|
108 *dest++ = (char)0x80 | ((n >> 12) & 0x3f); |
|
109 *dest++ = (char)0x80 | ((n >> 6) & 0x3f); |
|
110 *dest++ = (char)0x80 | (n & 0x3f); |
|
111 destLen -= 4; |
|
112 ++src; |
|
113 } |
|
114 } else { |
|
115 if (destLen < 3) |
|
116 goto error_more_output; |
|
117 //treat rest of the character as BMP |
|
118 *dest++ = (char)0xe0 | (*src >> 12); |
|
119 *dest++ = (char)0x80 | ((*src >> 6) & 0x003f); |
|
120 *dest++ = (char)0x80 | (*src & 0x003f); |
|
121 destLen -= 3; |
|
122 } |
|
123 ++src; |
|
124 } |
|
125 |
|
126 *aDestLength = dest - aDest; |
|
127 return NS_OK; |
|
128 |
|
129 error_more_output: |
|
130 *aSrcLength = src - aSrc; |
|
131 *aDestLength = dest - aDest; |
|
132 return NS_OK_UENC_MOREOUTPUT; |
|
133 } |
|
134 |
|
135 NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength) |
|
136 { |
|
137 char * dest = aDest; |
|
138 |
|
139 if (mHighSurrogate) { |
|
140 if (*aDestLength < 3) { |
|
141 *aDestLength = 0; |
|
142 return NS_OK_UENC_MOREOUTPUT; |
|
143 } |
|
144 *dest++ = (char)0xef; //replacement character |
|
145 *dest++ = (char)0xbf; |
|
146 *dest++ = (char)0xbd; |
|
147 mHighSurrogate = 0; |
|
148 *aDestLength = 3; |
|
149 return NS_OK; |
|
150 } |
|
151 |
|
152 *aDestLength = 0; |
|
153 return NS_OK; |
|
154 } |