browser/components/translation/cld2/internal/compact_lang_det_hint_code.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // Copyright 2013 Google Inc. All Rights Reserved.
     2 //
     3 // Licensed under the Apache License, Version 2.0 (the "License");
     4 // you may not use this file except in compliance with the License.
     5 // You may obtain a copy of the License at
     6 //
     7 //     http://www.apache.org/licenses/LICENSE-2.0
     8 //
     9 // Unless required by applicable law or agreed to in writing, software
    10 // distributed under the License is distributed on an "AS IS" BASIS,
    11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12 // See the License for the specific language governing permissions and
    13 // limitations under the License.
    15 //
    16 // Author: dsites@google.com (Dick Sites)
    17 //
    19 #ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
    20 #define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
    23 #include <string>
    24 #include "integral_types.h"
    25 #include "lang_script.h"
    26 #include "../public/encodings.h"
    28 namespace CLD2 {
    30 // Packed <Language, weight>, weight in [-32..31] (powers of 2**1.6 ~=3.03)
    31 // Full language in bottom 10 bits, weight in top 6 bits
    32 typedef int16 OneCLDLangPrior;
    34 const int kMaxOneCLDLangPrior = 14;
    35 typedef struct {
    36   int32 n;
    37   OneCLDLangPrior prior[kMaxOneCLDLangPrior];
    38 } CLDLangPriors;
    40 // Reading exposed here; setting hidden in .cc
    41 inline int GetCLDPriorWeight(OneCLDLangPrior olp) {
    42   return olp >> 10;
    43 }
    44 inline Language GetCLDPriorLang(OneCLDLangPrior olp) {
    45   return static_cast<Language>(olp & 0x3ff);
    46 }
    48 inline int32 GetCLDLangPriorCount(CLDLangPriors* lps) {
    49   return lps->n;
    50 }
    52 inline void InitCLDLangPriors(CLDLangPriors* lps) {
    53   lps->n = 0;
    54 }
    56 // Trim language priors to no more than max_entries, keeping largest abs weights
    57 void TrimCLDLangPriors(int max_entries, CLDLangPriors* lps);
    59 // Trim language tag string to canonical form for each language
    60 // Input is from GetLangTagsFromHtml(), already lowercased
    61 std::string TrimCLDLangTagsHint(const std::string& langtags);
    63 // Add hints to vector of langpriors
    64 // Input is from GetLangTagsFromHtml(), already lowercased
    65 void SetCLDLangTagsHint(const std::string& langtags, CLDLangPriors* langpriors);
    67 // Add hints to vector of langpriors
    68 // Input is from HTTP content-language
    69 void SetCLDContentLangHint(const char* contentlang, CLDLangPriors* langpriors);
    71 // Add hints to vector of langpriors
    72 // Input is from GetTLD(), already lowercased
    73 void SetCLDTLDHint(const char* tld, CLDLangPriors* langpriors);
    75 // Add hints to vector of langpriors
    76 // Input is from DetectEncoding()
    77 void SetCLDEncodingHint(Encoding enc, CLDLangPriors* langpriors);
    79 // Add hints to vector of langpriors
    80 // Input is from random source
    81 void SetCLDLanguageHint(Language lang, CLDLangPriors* langpriors);
    83 // Make printable string of priors
    84 std::string DumpCLDLangPriors(const CLDLangPriors* langpriors);
    87 // Get language tag hints from HTML body
    88 // Normalize: remove spaces and make lowercase comma list
    89 std::string GetLangTagsFromHtml(const char* utf8_body, int32 utf8_body_len,
    90                            int32 max_scan_bytes);
    92 }       // End namespace CLD2
    94 #endif  // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__

mercurial