Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | /** |
michael@0 | 6 | * A character set converter from GBK to Unicode. |
michael@0 | 7 | * |
michael@0 | 8 | * |
michael@0 | 9 | * @created 07/Sept/1999 |
michael@0 | 10 | * @author Yueheng Xu, Yueheng.Xu@intel.com |
michael@0 | 11 | */ |
michael@0 | 12 | |
michael@0 | 13 | #include "nsGBKToUnicode.h" |
michael@0 | 14 | #include "gbku.h" |
michael@0 | 15 | |
michael@0 | 16 | |
michael@0 | 17 | //------------------------------------------------------------ |
michael@0 | 18 | // nsGBKUnique2BytesToUnicode |
michael@0 | 19 | //------------------------------------------------------------ |
michael@0 | 20 | class nsGBKUnique2BytesToUnicode : public nsTableDecoderSupport |
michael@0 | 21 | { |
michael@0 | 22 | public: |
michael@0 | 23 | nsGBKUnique2BytesToUnicode(); |
michael@0 | 24 | virtual ~nsGBKUnique2BytesToUnicode() |
michael@0 | 25 | { } |
michael@0 | 26 | protected: |
michael@0 | 27 | }; |
michael@0 | 28 | |
michael@0 | 29 | static const uint16_t g_utGBKUnique2Bytes[] = { |
michael@0 | 30 | #include "gbkuniq2b.ut" |
michael@0 | 31 | }; |
michael@0 | 32 | nsGBKUnique2BytesToUnicode::nsGBKUnique2BytesToUnicode() |
michael@0 | 33 | : nsTableDecoderSupport(u2BytesCharset, nullptr, |
michael@0 | 34 | (uMappingTable*) &g_utGBKUnique2Bytes, 1) |
michael@0 | 35 | { |
michael@0 | 36 | } |
michael@0 | 37 | |
michael@0 | 38 | //------------------------------------------------------------ |
michael@0 | 39 | // nsGB18030Unique2BytesToUnicode |
michael@0 | 40 | //------------------------------------------------------------ |
michael@0 | 41 | class nsGB18030Unique2BytesToUnicode : public nsTableDecoderSupport |
michael@0 | 42 | { |
michael@0 | 43 | public: |
michael@0 | 44 | nsGB18030Unique2BytesToUnicode(); |
michael@0 | 45 | virtual ~nsGB18030Unique2BytesToUnicode() |
michael@0 | 46 | { } |
michael@0 | 47 | protected: |
michael@0 | 48 | }; |
michael@0 | 49 | |
michael@0 | 50 | static const uint16_t g_utGB18030Unique2Bytes[] = { |
michael@0 | 51 | #include "gb18030uniq2b.ut" |
michael@0 | 52 | }; |
michael@0 | 53 | nsGB18030Unique2BytesToUnicode::nsGB18030Unique2BytesToUnicode() |
michael@0 | 54 | : nsTableDecoderSupport(u2BytesCharset, nullptr, |
michael@0 | 55 | (uMappingTable*) &g_utGB18030Unique2Bytes, 1) |
michael@0 | 56 | { |
michael@0 | 57 | } |
michael@0 | 58 | |
michael@0 | 59 | //------------------------------------------------------------ |
michael@0 | 60 | // nsGB18030Unique4BytesToUnicode |
michael@0 | 61 | //------------------------------------------------------------ |
michael@0 | 62 | class nsGB18030Unique4BytesToUnicode : public nsTableDecoderSupport |
michael@0 | 63 | { |
michael@0 | 64 | public: |
michael@0 | 65 | nsGB18030Unique4BytesToUnicode(); |
michael@0 | 66 | virtual ~nsGB18030Unique4BytesToUnicode() |
michael@0 | 67 | { } |
michael@0 | 68 | protected: |
michael@0 | 69 | }; |
michael@0 | 70 | |
michael@0 | 71 | static const uint16_t g_utGB18030Unique4Bytes[] = { |
michael@0 | 72 | #include "gb180304bytes.ut" |
michael@0 | 73 | }; |
michael@0 | 74 | nsGB18030Unique4BytesToUnicode::nsGB18030Unique4BytesToUnicode() |
michael@0 | 75 | : nsTableDecoderSupport(u4BytesGB18030Charset, nullptr, |
michael@0 | 76 | (uMappingTable*) &g_utGB18030Unique4Bytes, 1) |
michael@0 | 77 | { |
michael@0 | 78 | } |
michael@0 | 79 | |
michael@0 | 80 | |
michael@0 | 81 | //---------------------------------------------------------------------- |
michael@0 | 82 | // Class nsGBKToUnicode [implementation] |
michael@0 | 83 | |
michael@0 | 84 | //---------------------------------------------------------------------- |
michael@0 | 85 | // Subclassing of nsTablesDecoderSupport class [implementation] |
michael@0 | 86 | |
michael@0 | 87 | #define LEGAL_GBK_MULTIBYTE_FIRST_BYTE(c) \ |
michael@0 | 88 | (UINT8_IN_RANGE(0x81, (c), 0xFE)) |
michael@0 | 89 | #define FIRST_BYTE_IS_SURROGATE(c) \ |
michael@0 | 90 | (UINT8_IN_RANGE(0x90, (c), 0xFE)) |
michael@0 | 91 | #define LEGAL_GBK_2BYTE_SECOND_BYTE(c) \ |
michael@0 | 92 | (UINT8_IN_RANGE(0x40, (c), 0x7E)|| UINT8_IN_RANGE(0x80, (c), 0xFE)) |
michael@0 | 93 | #define LEGAL_GBK_4BYTE_SECOND_BYTE(c) \ |
michael@0 | 94 | (UINT8_IN_RANGE(0x30, (c), 0x39)) |
michael@0 | 95 | #define LEGAL_GBK_4BYTE_THIRD_BYTE(c) \ |
michael@0 | 96 | (UINT8_IN_RANGE(0x81, (c), 0xFE)) |
michael@0 | 97 | #define LEGAL_GBK_4BYTE_FORTH_BYTE(c) \ |
michael@0 | 98 | (UINT8_IN_RANGE(0x30, (c), 0x39)) |
michael@0 | 99 | |
michael@0 | 100 | NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc, |
michael@0 | 101 | int32_t * aSrcLength, |
michael@0 | 102 | char16_t *aDest, |
michael@0 | 103 | int32_t * aDestLength) |
michael@0 | 104 | { |
michael@0 | 105 | int32_t i=0; |
michael@0 | 106 | int32_t iSrcLength = (*aSrcLength); |
michael@0 | 107 | int32_t iDestlen = 0; |
michael@0 | 108 | nsresult rv=NS_OK; |
michael@0 | 109 | *aSrcLength = 0; |
michael@0 | 110 | |
michael@0 | 111 | for (i=0;i<iSrcLength;i++) |
michael@0 | 112 | { |
michael@0 | 113 | if ( iDestlen >= (*aDestLength) ) |
michael@0 | 114 | { |
michael@0 | 115 | rv = NS_OK_UDEC_MOREOUTPUT; |
michael@0 | 116 | break; |
michael@0 | 117 | } |
michael@0 | 118 | // The valid range for the 1st byte is [0x81,0xFE] |
michael@0 | 119 | if(LEGAL_GBK_MULTIBYTE_FIRST_BYTE(*aSrc)) |
michael@0 | 120 | { |
michael@0 | 121 | if(i+1 >= iSrcLength) |
michael@0 | 122 | { |
michael@0 | 123 | rv = NS_OK_UDEC_MOREINPUT; |
michael@0 | 124 | break; |
michael@0 | 125 | } |
michael@0 | 126 | // To make sure, the second byte has to be checked as well. |
michael@0 | 127 | // In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE] |
michael@0 | 128 | if(LEGAL_GBK_2BYTE_SECOND_BYTE(aSrc[1])) |
michael@0 | 129 | { |
michael@0 | 130 | // Valid GBK code |
michael@0 | 131 | *aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]); |
michael@0 | 132 | if(UCS2_NO_MAPPING == *aDest) |
michael@0 | 133 | { |
michael@0 | 134 | // We cannot map in the common mapping, let's call the |
michael@0 | 135 | // delegate 2 byte decoder to decode the gbk or gb18030 unique |
michael@0 | 136 | // 2 byte mapping |
michael@0 | 137 | if(! TryExtensionDecoder(aSrc, aDest)) |
michael@0 | 138 | { |
michael@0 | 139 | *aDest = UCS2_NO_MAPPING; |
michael@0 | 140 | } |
michael@0 | 141 | } |
michael@0 | 142 | aSrc += 2; |
michael@0 | 143 | i++; |
michael@0 | 144 | } |
michael@0 | 145 | else if (LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1])) |
michael@0 | 146 | { |
michael@0 | 147 | // from the first 2 bytes, it looks like a 4 byte GB18030 |
michael@0 | 148 | if(i+3 >= iSrcLength) // make sure we got 4 bytes |
michael@0 | 149 | { |
michael@0 | 150 | rv = NS_OK_UDEC_MOREINPUT; |
michael@0 | 151 | break; |
michael@0 | 152 | } |
michael@0 | 153 | // 4 bytes patten |
michael@0 | 154 | // [0x81-0xfe][0x30-0x39][0x81-0xfe][0x30-0x39] |
michael@0 | 155 | // preset the |
michael@0 | 156 | |
michael@0 | 157 | if (LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]) && |
michael@0 | 158 | LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3])) |
michael@0 | 159 | { |
michael@0 | 160 | if ( ! FIRST_BYTE_IS_SURROGATE(aSrc[0])) |
michael@0 | 161 | { |
michael@0 | 162 | // let's call the delegated 4 byte gb18030 converter to convert it |
michael@0 | 163 | if(! Try4BytesDecoder(aSrc, aDest)) |
michael@0 | 164 | *aDest = UCS2_NO_MAPPING; |
michael@0 | 165 | } else { |
michael@0 | 166 | // let's try supplement mapping |
michael@0 | 167 | if ( (iDestlen+1) < (*aDestLength) ) |
michael@0 | 168 | { |
michael@0 | 169 | if(DecodeToSurrogate(aSrc, aDest)) |
michael@0 | 170 | { |
michael@0 | 171 | // surrogte two char16_t |
michael@0 | 172 | iDestlen++; |
michael@0 | 173 | aDest++; |
michael@0 | 174 | } else { |
michael@0 | 175 | *aDest = UCS2_NO_MAPPING; |
michael@0 | 176 | } |
michael@0 | 177 | } else { |
michael@0 | 178 | if (*aDestLength < 2) { |
michael@0 | 179 | NS_ERROR("insufficient space in output buffer"); |
michael@0 | 180 | *aDest = UCS2_NO_MAPPING; |
michael@0 | 181 | } else { |
michael@0 | 182 | rv = NS_OK_UDEC_MOREOUTPUT; |
michael@0 | 183 | break; |
michael@0 | 184 | } |
michael@0 | 185 | } |
michael@0 | 186 | } |
michael@0 | 187 | aSrc += 4; |
michael@0 | 188 | i += 3; |
michael@0 | 189 | } else { |
michael@0 | 190 | *aDest = UCS2_NO_MAPPING; |
michael@0 | 191 | // If the third and fourth bytes are not in the legal ranges for |
michael@0 | 192 | // a four-byte sequnce, resynchronize on the second byte |
michael@0 | 193 | // (which we know is in the range of LEGAL_GBK_4BYTE_SECOND_BYTE, |
michael@0 | 194 | // 0x30-0x39) |
michael@0 | 195 | aSrc++; |
michael@0 | 196 | } |
michael@0 | 197 | } |
michael@0 | 198 | else if ((uint8_t) aSrc[0] == (uint8_t)0xA0 ) |
michael@0 | 199 | { |
michael@0 | 200 | // stand-alone (not followed by a valid second byte) 0xA0 ! |
michael@0 | 201 | // treat it as valid a la Netscape 4.x |
michael@0 | 202 | *aDest = CAST_CHAR_TO_UNICHAR(*aSrc); |
michael@0 | 203 | aSrc++; |
michael@0 | 204 | } else { |
michael@0 | 205 | // Invalid GBK code point (second byte should be 0x40 or higher) |
michael@0 | 206 | *aDest = UCS2_NO_MAPPING; |
michael@0 | 207 | aSrc++; |
michael@0 | 208 | } |
michael@0 | 209 | } else { |
michael@0 | 210 | if(IS_ASCII(*aSrc)) |
michael@0 | 211 | { |
michael@0 | 212 | // The source is an ASCII |
michael@0 | 213 | *aDest = CAST_CHAR_TO_UNICHAR(*aSrc); |
michael@0 | 214 | aSrc++; |
michael@0 | 215 | } else { |
michael@0 | 216 | if(IS_GBK_EURO(*aSrc)) { |
michael@0 | 217 | *aDest = UCS2_EURO; |
michael@0 | 218 | } else { |
michael@0 | 219 | *aDest = UCS2_NO_MAPPING; |
michael@0 | 220 | } |
michael@0 | 221 | aSrc++; |
michael@0 | 222 | } |
michael@0 | 223 | } |
michael@0 | 224 | iDestlen++; |
michael@0 | 225 | aDest++; |
michael@0 | 226 | *aSrcLength = i+1; |
michael@0 | 227 | } |
michael@0 | 228 | *aDestLength = iDestlen; |
michael@0 | 229 | return rv; |
michael@0 | 230 | } |
michael@0 | 231 | |
michael@0 | 232 | |
michael@0 | 233 | void nsGBKToUnicode::CreateExtensionDecoder() |
michael@0 | 234 | { |
michael@0 | 235 | mExtensionDecoder = new nsGBKUnique2BytesToUnicode(); |
michael@0 | 236 | } |
michael@0 | 237 | void nsGBKToUnicode::Create4BytesDecoder() |
michael@0 | 238 | { |
michael@0 | 239 | m4BytesDecoder = nullptr; |
michael@0 | 240 | } |
michael@0 | 241 | void nsGB18030ToUnicode::CreateExtensionDecoder() |
michael@0 | 242 | { |
michael@0 | 243 | mExtensionDecoder = new nsGB18030Unique2BytesToUnicode(); |
michael@0 | 244 | } |
michael@0 | 245 | void nsGB18030ToUnicode::Create4BytesDecoder() |
michael@0 | 246 | { |
michael@0 | 247 | m4BytesDecoder = new nsGB18030Unique4BytesToUnicode(); |
michael@0 | 248 | } |
michael@0 | 249 | bool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, char16_t* aOut) |
michael@0 | 250 | { |
michael@0 | 251 | NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]), "illegal first byte"); |
michael@0 | 252 | NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]), "illegal second byte"); |
michael@0 | 253 | NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]), "illegal third byte"); |
michael@0 | 254 | NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]), "illegal forth byte"); |
michael@0 | 255 | if(! FIRST_BYTE_IS_SURROGATE(aSrc[0])) |
michael@0 | 256 | return false; |
michael@0 | 257 | if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1])) |
michael@0 | 258 | return false; |
michael@0 | 259 | if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2])) |
michael@0 | 260 | return false; |
michael@0 | 261 | if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3])) |
michael@0 | 262 | return false; |
michael@0 | 263 | |
michael@0 | 264 | uint8_t a1 = (uint8_t) aSrc[0]; |
michael@0 | 265 | uint8_t a2 = (uint8_t) aSrc[1]; |
michael@0 | 266 | uint8_t a3 = (uint8_t) aSrc[2]; |
michael@0 | 267 | uint8_t a4 = (uint8_t) aSrc[3]; |
michael@0 | 268 | a1 -= (uint8_t)0x90; |
michael@0 | 269 | a2 -= (uint8_t)0x30; |
michael@0 | 270 | a3 -= (uint8_t)0x81; |
michael@0 | 271 | a4 -= (uint8_t)0x30; |
michael@0 | 272 | uint32_t idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4; |
michael@0 | 273 | // idx == ucs4Codepoint - 0x10000 |
michael@0 | 274 | if (idx > 0x000FFFFF) |
michael@0 | 275 | return false; |
michael@0 | 276 | |
michael@0 | 277 | *aOut++ = 0xD800 | (idx >> 10); |
michael@0 | 278 | *aOut = 0xDC00 | (0x000003FF & idx); |
michael@0 | 279 | |
michael@0 | 280 | return true; |
michael@0 | 281 | } |
michael@0 | 282 | bool nsGBKToUnicode::TryExtensionDecoder(const char* aSrc, char16_t* aOut) |
michael@0 | 283 | { |
michael@0 | 284 | if(!mExtensionDecoder) |
michael@0 | 285 | CreateExtensionDecoder(); |
michael@0 | 286 | NS_ASSERTION(mExtensionDecoder, "cannot creqte 2 bytes unique converter"); |
michael@0 | 287 | if(mExtensionDecoder) |
michael@0 | 288 | { |
michael@0 | 289 | nsresult res = mExtensionDecoder->Reset(); |
michael@0 | 290 | NS_ASSERTION(NS_SUCCEEDED(res), "2 bytes unique conversoin reset failed"); |
michael@0 | 291 | int32_t len = 2; |
michael@0 | 292 | int32_t dstlen = 1; |
michael@0 | 293 | res = mExtensionDecoder->Convert(aSrc,&len, aOut, &dstlen); |
michael@0 | 294 | NS_ASSERTION(NS_FAILED(res) || ((len==2) && (dstlen == 1)), |
michael@0 | 295 | "some strange conversion result"); |
michael@0 | 296 | // if we failed, we then just use the 0xfffd |
michael@0 | 297 | // therefore, we ignore the res here. |
michael@0 | 298 | if(NS_SUCCEEDED(res)) |
michael@0 | 299 | return true; |
michael@0 | 300 | } |
michael@0 | 301 | return false; |
michael@0 | 302 | } |
michael@0 | 303 | bool nsGBKToUnicode::DecodeToSurrogate(const char* aSrc, char16_t* aOut) |
michael@0 | 304 | { |
michael@0 | 305 | return false; |
michael@0 | 306 | } |
michael@0 | 307 | bool nsGBKToUnicode::Try4BytesDecoder(const char* aSrc, char16_t* aOut) |
michael@0 | 308 | { |
michael@0 | 309 | if(!m4BytesDecoder) |
michael@0 | 310 | Create4BytesDecoder(); |
michael@0 | 311 | if(m4BytesDecoder) |
michael@0 | 312 | { |
michael@0 | 313 | nsresult res = m4BytesDecoder->Reset(); |
michael@0 | 314 | NS_ASSERTION(NS_SUCCEEDED(res), "4 bytes unique conversoin reset failed"); |
michael@0 | 315 | int32_t len = 4; |
michael@0 | 316 | int32_t dstlen = 1; |
michael@0 | 317 | res = m4BytesDecoder->Convert(aSrc,&len, aOut, &dstlen); |
michael@0 | 318 | NS_ASSERTION(NS_FAILED(res) || ((len==4) && (dstlen == 1)), |
michael@0 | 319 | "some strange conversion result"); |
michael@0 | 320 | // if we failed, we then just use the 0xfffd |
michael@0 | 321 | // therefore, we ignore the res here. |
michael@0 | 322 | if(NS_SUCCEEDED(res)) |
michael@0 | 323 | return true; |
michael@0 | 324 | } |
michael@0 | 325 | return false; |
michael@0 | 326 | } |