browser/components/translation/cld2/internal/fixunicodevalue.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/browser/components/translation/cld2/internal/fixunicodevalue.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,68 @@
     1.4 +// Copyright 2013 Google Inc. All Rights Reserved.
     1.5 +//
     1.6 +// Licensed under the Apache License, Version 2.0 (the "License");
     1.7 +// you may not use this file except in compliance with the License.
     1.8 +// You may obtain a copy of the License at
     1.9 +//
    1.10 +//     http://www.apache.org/licenses/LICENSE-2.0
    1.11 +//
    1.12 +// Unless required by applicable law or agreed to in writing, software
    1.13 +// distributed under the License is distributed on an "AS IS" BASIS,
    1.14 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    1.15 +// See the License for the specific language governing permissions and
    1.16 +// limitations under the License.
    1.17 +
    1.18 +//
    1.19 +// Routine that maps a Unicode code point to an interchange-valid one
    1.20 +//
    1.21 +// Table that maps MS CP1252 bytes 00-FF to their corresponding Unicode
    1.22 +// code points. C0 and C1 control codes that are not interchange-valid
    1.23 +// are mapped to spaces.
    1.24 +
    1.25 +
    1.26 +#ifndef I18N_ENCODINGS_CLD2_INTERNAL_FIXUNICODEVALUE_H__
    1.27 +#define I18N_ENCODINGS_CLD2_INTERNAL_FIXUNICODEVALUE_H__
    1.28 +
    1.29 +#include "integral_types.h"        // for char32
    1.30 +#include "port.h"
    1.31 +
    1.32 +namespace CLD2 {
    1.33 +
    1.34 +// Map byte value 0000-00FF to char32
    1.35 +// Maps C0 control codes (other than CR LF HT FF) to space [29 instances including DEL=0x7F]
    1.36 +// Maps C1 control codes to CP1252 [27 instances] or space [5 instances]
    1.37 +static const char32 kMapFullMicrosoft1252OrSpace[256] = {
    1.38 +  0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x09,0x0a,0x20, 0x0c,0x0d,0x20,0x20, // 00
    1.39 +  0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20,
    1.40 +  0x20,0x21,0x22,0x23, 0x24,0x25,0x26,0x27, 0x28,0x29,0x2a,0x2b, 0x2c,0x2d,0x2e,0x2f,
    1.41 +  0x30,0x31,0x32,0x33, 0x34,0x35,0x36,0x37, 0x38,0x39,0x3a,0x3b, 0x3c,0x3d,0x3e,0x3f,
    1.42 +
    1.43 +  0x40,0x41,0x42,0x43, 0x44,0x45,0x46,0x47, 0x48,0x49,0x4a,0x4b, 0x4c,0x4d,0x4e,0x4f, // 40
    1.44 +  0x50,0x51,0x52,0x53, 0x54,0x55,0x56,0x57, 0x58,0x59,0x5a,0x5b, 0x5c,0x5d,0x5e,0x5f,
    1.45 +  0x60,0x61,0x62,0x63, 0x64,0x65,0x66,0x67, 0x68,0x69,0x6a,0x6b, 0x6c,0x6d,0x6e,0x6f,
    1.46 +  0x70,0x71,0x72,0x73, 0x74,0x75,0x76,0x77, 0x78,0x79,0x7a,0x7b, 0x7c,0x7d,0x7e,0x20,
    1.47 +
    1.48 +  0x20ac,0x20,0x201a,0x0192, 0x201e,0x2026,0x2020,0x2021,                             // 80
    1.49 +  0x02c6,0x2030,0x0160,0x2039, 0x0152,0x20,0x017d,0x20,
    1.50 +  0x20,0x2018,0x2019,0x201c, 0x201d,0x2022,0x2013,0x2014,
    1.51 +  0x02dc,0x2122,0x0161,0x203a, 0x0153,0x20,0x017e,0x0178,
    1.52 +  0xa0,0xa1,0xa2,0xa3, 0xa4,0xa5,0xa6,0xa7, 0xa8,0xa9,0xaa,0xab, 0xac,0xad,0xae,0xaf, // A0
    1.53 +  0xb0,0xb1,0xb2,0xb3, 0xb4,0xb5,0xb6,0xb7, 0xb8,0xb9,0xba,0xbb, 0xbc,0xbd,0xbe,0xbf,
    1.54 +
    1.55 +  0xc0,0xc1,0xc2,0xc3, 0xc4,0xc5,0xc6,0xc7, 0xc8,0xc9,0xca,0xcb, 0xcc,0xcd,0xce,0xcf, // C0
    1.56 +  0xd0,0xd1,0xd2,0xd3, 0xd4,0xd5,0xd6,0xd7, 0xd8,0xd9,0xda,0xdb, 0xdc,0xdd,0xde,0xdf,
    1.57 +  0xe0,0xe1,0xe2,0xe3, 0xe4,0xe5,0xe6,0xe7, 0xe8,0xe9,0xea,0xeb, 0xec,0xed,0xee,0xef,
    1.58 +  0xf0,0xf1,0xf2,0xf3, 0xf4,0xf5,0xf6,0xf7, 0xf8,0xf9,0xfa,0xfb, 0xfc,0xfd,0xfe,0xff,
    1.59 +};
    1.60 +
    1.61 +// Guarantees that the resulting output value is interchange valid
    1.62 +//  00-FF; map to spaces or MS CP1252
    1.63 +//  D800-DFFF; surrogates
    1.64 +//  FDD0-FDEF; non-characters
    1.65 +//  xxFFFE-xxFFFF; non-characters
    1.66 +char32 FixUnicodeValue(char32 uv);
    1.67 +
    1.68 +}       // End namespace CLD2
    1.69 +
    1.70 +#endif  // I18N_ENCODINGS_CLD2_INTERNAL_FIXUNICODEVALUE_H__
    1.71 +

mercurial