intl/uconv/ucvcn/nsGBKConvUtil.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/ucvcn/nsGBKConvUtil.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,93 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "nsGBKConvUtil.h"
    1.10 +#include "gbku.h"
    1.11 +#include "nsDebug.h"
    1.12 +#define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
    1.13 +//--------------------------------------------------------------------
    1.14 +// nsGBKConvUtil
    1.15 +//--------------------------------------------------------------------
    1.16 +
    1.17 +static const char16_t gGBKToUnicodeTable[MAX_GBK_LENGTH] = {
    1.18 +#include "cp936map.h"
    1.19 +};
    1.20 +static const uint16_t gUnicodeToGBKTable[0xA000-0x4e00] = {
    1.21 +#include "cp936invmap.h"
    1.22 +};
    1.23 +
    1.24 +bool nsGBKConvUtil::UnicodeToGBKChar(
    1.25 +  char16_t aChar, bool aToGL, char* 
    1.26 +  aOutByte1, char* aOutByte2)
    1.27 +{
    1.28 +  bool found=false;
    1.29 +  *aOutByte1 = *aOutByte2 = 0;
    1.30 +  if(UNICHAR_IN_RANGE(0xd800, aChar, 0xdfff))
    1.31 +  {
    1.32 +    // surrogate is not in here
    1.33 +    return false;
    1.34 +  }
    1.35 +  if(UNICHAR_IN_RANGE(0x4e00, aChar, 0x9FFF))
    1.36 +  {
    1.37 +    uint16_t item = gUnicodeToGBKTable[aChar - 0x4e00];
    1.38 +    if(item != 0) 
    1.39 +    {
    1.40 +      *aOutByte1 = item >> 8;
    1.41 +      *aOutByte2 = item & 0x00FF;
    1.42 +      found = true;
    1.43 +    } else {
    1.44 +      return false;
    1.45 +    }
    1.46 +  } else {
    1.47 +    // ugly linear search
    1.48 +    for( int32_t i = 0; i < MAX_GBK_LENGTH; i++ )
    1.49 +    {
    1.50 +      if( aChar == gGBKToUnicodeTable[i])
    1.51 +      {
    1.52 +        *aOutByte1 = (i /  0x00BF + 0x0081) ;
    1.53 +        *aOutByte2 = (i %  0x00BF + 0x0040) ;
    1.54 +        found = true;
    1.55 +        break;
    1.56 +      }
    1.57 +    }
    1.58 +  }
    1.59 +  if(! found)
    1.60 +    return false;
    1.61 +
    1.62 +  if(aToGL) {
    1.63 +    // to GL, we only return if it is in the range 
    1.64 +    if(UINT8_IN_RANGE(0xA1, *aOutByte1, 0xFE) &&
    1.65 +       UINT8_IN_RANGE(0xA1, *aOutByte2, 0xFE))
    1.66 +    {
    1.67 +      // mask them to GL 
    1.68 +      *aOutByte1 &= 0x7F;
    1.69 +      *aOutByte2 &= 0x7F;
    1.70 +    } else {
    1.71 +      // if it does not fit into 0xa1-0xfe 0xa1-0xfe range that mean
    1.72 +      // it is not a GB2312 character, we cannot map to GL 
    1.73 +      *aOutByte1 = 0x00;
    1.74 +      *aOutByte2 = 0x00;
    1.75 +      return false;
    1.76 +    }
    1.77 +  }
    1.78 +  return true;
    1.79 +}
    1.80 +char16_t nsGBKConvUtil::GBKCharToUnicode(char aByte1, char aByte2)
    1.81 +{
    1.82 +  NS_ASSERTION(UINT8_IN_RANGE(0x81,aByte1, 0xFE), "first byte out of range");
    1.83 +  NS_ASSERTION(UINT8_IN_RANGE(0x40,aByte2, 0xFE), "second byte out of range");
    1.84 +
    1.85 +  uint8_t i1 = (uint8_t)aByte1;
    1.86 +  uint8_t i2 = (uint8_t)aByte2;
    1.87 +  uint16_t idx = (i1 - 0x0081) * 0x00bf + i2 - 0x0040 ;
    1.88 +
    1.89 +  NS_ASSERTION(idx < MAX_GBK_LENGTH, "ARB");
    1.90 +  // play it safe- add if statement here ot protect ARB
    1.91 +  // probably not necessary
    1.92 +  if(idx < MAX_GBK_LENGTH)
    1.93 +    return gGBKToUnicodeTable[ idx ];
    1.94 +  else
    1.95 +    return UCS2_NO_MAPPING;
    1.96 +}

mercurial