1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/ucvcn/nsUnicodeToGBK.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,310 @@ 1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + /** 1.9 + * A character set converter from Unicode to GBK. 1.10 + * 1.11 + * 1.12 + * @created 08/Sept/1999 1.13 + * @author Yueheng Xu, Yueheng.Xu@intel.com 1.14 + * Revision History 1.15 + * 04/Oct/1999. Yueheng Xu: used table gUnicodeToGBKTable[0x5200] to make 1.16 + * Unicode to GB mapping fast 1.17 + */ 1.18 + 1.19 +#include "nsUnicodeToGBK.h" 1.20 +#include "gbku.h" 1.21 +#include "uconvutil.h" 1.22 +#include "nsCharTraits.h" 1.23 + 1.24 +//------------------------------------------------------------- 1.25 +// Global table initialization function defined in gbku.h 1.26 +//------------------------------------------------------------- 1.27 + 1.28 +//----------------------------------------------------------------------- 1.29 +// Private class used by nsUnicodeToGB18030 and nsUnicodeToGB18030Font0 1.30 +// nsUnicodeToGB18030Uniq2Bytes 1.31 +//----------------------------------------------------------------------- 1.32 +static const uint16_t g_uf_gb18030_2bytes[] = { 1.33 +#include "gb18030uniq2b.uf" 1.34 +}; 1.35 +class nsUnicodeToGB18030Uniq2Bytes : public nsTableEncoderSupport 1.36 +{ 1.37 +public: 1.38 + nsUnicodeToGB18030Uniq2Bytes() 1.39 + : nsTableEncoderSupport(u2BytesCharset, 1.40 + (uMappingTable*) &g_uf_gb18030_2bytes, 2) {} 1.41 +protected: 1.42 +}; 1.43 +//----------------------------------------------------------------------- 1.44 +// Private class used by nsUnicodeToGB18030 1.45 +// nsUnicodeTo4BytesGB18030 1.46 +//----------------------------------------------------------------------- 1.47 +static const uint16_t g_uf_gb18030_4bytes[] = { 1.48 +#include "gb180304bytes.uf" 1.49 +}; 1.50 +class nsUnicodeTo4BytesGB18030 : public nsTableEncoderSupport 1.51 +{ 1.52 +public: 1.53 + nsUnicodeTo4BytesGB18030() 1.54 + : nsTableEncoderSupport(u4BytesGB18030Charset, 1.55 + (uMappingTable*) &g_uf_gb18030_4bytes, 4) {} 1.56 +protected: 1.57 +}; 1.58 +//----------------------------------------------------------------------- 1.59 +// Private class used by nsUnicodeToGBK 1.60 +// nsUnicodeToGBKUniq2Bytes 1.61 +//----------------------------------------------------------------------- 1.62 +static const uint16_t g_uf_gbk_2bytes[] = { 1.63 +#include "gbkuniq2b.uf" 1.64 +}; 1.65 +class nsUnicodeToGBKUniq2Bytes : public nsTableEncoderSupport 1.66 +{ 1.67 +public: 1.68 + nsUnicodeToGBKUniq2Bytes() 1.69 + : nsTableEncoderSupport(u2BytesCharset, 1.70 + (uMappingTable*) &g_uf_gbk_2bytes, 2) {} 1.71 +protected: 1.72 +}; 1.73 +//----------------------------------------------------------------------- 1.74 +// nsUnicodeToGB18030 1.75 +//----------------------------------------------------------------------- 1.76 +void nsUnicodeToGB18030::CreateExtensionEncoder() 1.77 +{ 1.78 + mExtensionEncoder = new nsUnicodeToGB18030Uniq2Bytes(); 1.79 +} 1.80 +void nsUnicodeToGB18030::Create4BytesEncoder() 1.81 +{ 1.82 + m4BytesEncoder = new nsUnicodeTo4BytesGB18030(); 1.83 +} 1.84 + 1.85 +bool nsUnicodeToGB18030::EncodeSurrogate( 1.86 + char16_t aSurrogateHigh, 1.87 + char16_t aSurrogateLow, 1.88 + char* aOut) 1.89 +{ 1.90 + if( NS_IS_HIGH_SURROGATE(aSurrogateHigh) && 1.91 + NS_IS_LOW_SURROGATE(aSurrogateLow) ) 1.92 + { 1.93 + // notice that idx does not include the 0x10000 1.94 + uint32_t idx = ((aSurrogateHigh - (char16_t)0xD800) << 10 ) | 1.95 + (aSurrogateLow - (char16_t) 0xDC00); 1.96 + 1.97 + unsigned char *out = (unsigned char*) aOut; 1.98 + // notice this is from 0x90 for supplment planes 1.99 + out[0] = (idx / (10*126*10)) + 0x90; 1.100 + idx %= (10*126*10); 1.101 + out[1] = (idx / (10*126)) + 0x30; 1.102 + idx %= (10*126); 1.103 + out[2] = (idx / (10)) + 0x81; 1.104 + out[3] = (idx % 10) + 0x30; 1.105 + return true; 1.106 + } 1.107 + return false; 1.108 +} 1.109 + 1.110 +//---------------------------------------------------------------------- 1.111 +// Class nsUnicodeToGBK [implementation] 1.112 + 1.113 +nsUnicodeToGBK::nsUnicodeToGBK(uint32_t aMaxLength) : 1.114 + nsEncoderSupport(aMaxLength) 1.115 +{ 1.116 + mExtensionEncoder = nullptr; 1.117 + m4BytesEncoder = nullptr; 1.118 + mSurrogateHigh = 0; 1.119 +} 1.120 +void nsUnicodeToGBK::CreateExtensionEncoder() 1.121 +{ 1.122 + mExtensionEncoder = new nsUnicodeToGBKUniq2Bytes(); 1.123 +} 1.124 +void nsUnicodeToGBK::Create4BytesEncoder() 1.125 +{ 1.126 + m4BytesEncoder = nullptr; 1.127 +} 1.128 +bool nsUnicodeToGBK::TryExtensionEncoder( 1.129 + char16_t aChar, 1.130 + char* aOut, 1.131 + int32_t *aOutLen 1.132 +) 1.133 +{ 1.134 + if( NS_IS_HIGH_SURROGATE(aChar) || 1.135 + NS_IS_LOW_SURROGATE(aChar) ) 1.136 + { 1.137 + // performance tune for surrogate characters 1.138 + return false; 1.139 + } 1.140 + if(! mExtensionEncoder ) 1.141 + CreateExtensionEncoder(); 1.142 + if(mExtensionEncoder) 1.143 + { 1.144 + int32_t len = 1; 1.145 + nsresult res = NS_OK; 1.146 + res = mExtensionEncoder->Convert(&aChar, &len, aOut, aOutLen); 1.147 + if(NS_SUCCEEDED(res) && (*aOutLen > 0)) 1.148 + return true; 1.149 + } 1.150 + return false; 1.151 +} 1.152 + 1.153 +bool nsUnicodeToGBK::Try4BytesEncoder( 1.154 + char16_t aChar, 1.155 + char* aOut, 1.156 + int32_t *aOutLen 1.157 +) 1.158 +{ 1.159 + if( NS_IS_HIGH_SURROGATE(aChar) || 1.160 + NS_IS_LOW_SURROGATE(aChar) ) 1.161 + { 1.162 + // performance tune for surrogate characters 1.163 + return false; 1.164 + } 1.165 + if(! m4BytesEncoder ) 1.166 + Create4BytesEncoder(); 1.167 + if(m4BytesEncoder) 1.168 + { 1.169 + int32_t len = 1; 1.170 + nsresult res = NS_OK; 1.171 + res = m4BytesEncoder->Convert(&aChar, &len, aOut, aOutLen); 1.172 + NS_ASSERTION(NS_FAILED(res) || ((1 == len) && (4 == *aOutLen)), 1.173 + "unexpect conversion length"); 1.174 + if(NS_SUCCEEDED(res) && (*aOutLen > 0)) 1.175 + return true; 1.176 + } 1.177 + return false; 1.178 +} 1.179 +bool nsUnicodeToGBK::EncodeSurrogate( 1.180 + char16_t aSurrogateHigh, 1.181 + char16_t aSurrogateLow, 1.182 + char* aOut) 1.183 +{ 1.184 + return false; // GBK cannot encode Surrogate, let the subclass encode it. 1.185 +} 1.186 + 1.187 +NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff( 1.188 + const char16_t * aSrc, 1.189 + int32_t * aSrcLength, 1.190 + char * aDest, 1.191 + int32_t * aDestLength) 1.192 +{ 1.193 + int32_t iSrcLength = 0; 1.194 + int32_t iDestLength = 0; 1.195 + char16_t unicode; 1.196 + nsresult res = NS_OK; 1.197 + while (iSrcLength < *aSrcLength ) 1.198 + { 1.199 + unicode = *aSrc; 1.200 + //if unicode's hi byte has something, it is not ASCII, must be a GB 1.201 + if(IS_ASCII(unicode)) 1.202 + { 1.203 + // this is an ASCII 1.204 + *aDest = CAST_UNICHAR_TO_CHAR(*aSrc); 1.205 + aDest++; // increment 1 byte 1.206 + iDestLength +=1; 1.207 + } else { 1.208 + char byte1, byte2; 1.209 + if(mUtil.UnicodeToGBKChar( unicode, false, &byte1, &byte2)) 1.210 + { 1.211 + // make sure we still have 2 bytes for output first 1.212 + if(iDestLength+2 > *aDestLength) 1.213 + { 1.214 + res = NS_OK_UENC_MOREOUTPUT; 1.215 + break; 1.216 + } 1.217 + aDest[0] = byte1; 1.218 + aDest[1] = byte2; 1.219 + aDest += 2; // increment 2 bytes 1.220 + iDestLength +=2; 1.221 + } else { 1.222 + int32_t aOutLen = 2; 1.223 + // make sure we still have 2 bytes for output first 1.224 + if(iDestLength+2 > *aDestLength) 1.225 + { 1.226 + res = NS_OK_UENC_MOREOUTPUT; 1.227 + break; 1.228 + } 1.229 + // we cannot map in the common mapping. Let's try to 1.230 + // call the delegated 2 byte converter for the gbk or gb18030 1.231 + // unique 2 byte mapping 1.232 + if(TryExtensionEncoder(unicode, aDest, &aOutLen)) 1.233 + { 1.234 + iDestLength += aOutLen; 1.235 + aDest += aOutLen; 1.236 + } else { 1.237 + // make sure we still have 4 bytes for output first 1.238 + if(iDestLength+4 > *aDestLength) 1.239 + { 1.240 + res = NS_OK_UENC_MOREOUTPUT; 1.241 + break; 1.242 + } 1.243 + // we still cannot map. Let's try to 1.244 + // call the delegated GB18030 4 byte converter 1.245 + aOutLen = 4; 1.246 + if( NS_IS_HIGH_SURROGATE(unicode) ) 1.247 + { 1.248 + if((iSrcLength+1) < *aSrcLength ) { 1.249 + if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) { 1.250 + // since we got a surrogate pair, we need to increment src. 1.251 + iSrcLength++ ; 1.252 + aSrc++; 1.253 + iDestLength += aOutLen; 1.254 + aDest += aOutLen; 1.255 + } else { 1.256 + // only get a high surrogate, but not a low surrogate 1.257 + res = NS_ERROR_UENC_NOMAPPING; 1.258 + iSrcLength++; // include length of the unmapped character 1.259 + break; 1.260 + } 1.261 + } else { 1.262 + mSurrogateHigh = aSrc[0]; 1.263 + break; // this will go to afterwhileloop 1.264 + } 1.265 + } else { 1.266 + if( NS_IS_LOW_SURROGATE(unicode) ) 1.267 + { 1.268 + if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) { 1.269 + if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) { 1.270 + iDestLength += aOutLen; 1.271 + aDest += aOutLen; 1.272 + } else { 1.273 + // only get a high surrogate, but not a low surrogate 1.274 + res = NS_ERROR_UENC_NOMAPPING; 1.275 + iSrcLength++; // include length of the unmapped character 1.276 + break; 1.277 + } 1.278 + } else { 1.279 + // only get a low surrogate, but not a low surrogate 1.280 + res = NS_ERROR_UENC_NOMAPPING; 1.281 + iSrcLength++; // include length of the unmapped character 1.282 + break; 1.283 + } 1.284 + } else { 1.285 + if(Try4BytesEncoder(unicode, aDest, &aOutLen)) 1.286 + { 1.287 + NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here"); 1.288 + iDestLength += aOutLen; 1.289 + aDest += aOutLen; 1.290 + } else { 1.291 + res = NS_ERROR_UENC_NOMAPPING; 1.292 + iSrcLength++; // include length of the unmapped character 1.293 + break; 1.294 + } 1.295 + } 1.296 + } 1.297 + } 1.298 + } 1.299 + } 1.300 + iSrcLength++ ; // Each unicode char just count as one in char16_t string; 1.301 + mSurrogateHigh = 0; 1.302 + aSrc++; 1.303 + if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) ) 1.304 + { 1.305 + res = NS_OK_UENC_MOREOUTPUT; 1.306 + break; 1.307 + } 1.308 + } 1.309 +//afterwhileloop: 1.310 + *aDestLength = iDestLength; 1.311 + *aSrcLength = iSrcLength; 1.312 + return res; 1.313 +}