browser/components/translation/cld2/internal/compact_lang_det_hint_code.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 // Copyright 2013 Google Inc. All Rights Reserved.
michael@0 2 //
michael@0 3 // Licensed under the Apache License, Version 2.0 (the "License");
michael@0 4 // you may not use this file except in compliance with the License.
michael@0 5 // You may obtain a copy of the License at
michael@0 6 //
michael@0 7 // http://www.apache.org/licenses/LICENSE-2.0
michael@0 8 //
michael@0 9 // Unless required by applicable law or agreed to in writing, software
michael@0 10 // distributed under the License is distributed on an "AS IS" BASIS,
michael@0 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
michael@0 12 // See the License for the specific language governing permissions and
michael@0 13 // limitations under the License.
michael@0 14
michael@0 15 //
michael@0 16 // Author: dsites@google.com (Dick Sites)
michael@0 17 //
michael@0 18
michael@0 19 #ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
michael@0 20 #define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
michael@0 21
michael@0 22
michael@0 23 #include <string>
michael@0 24 #include "integral_types.h"
michael@0 25 #include "lang_script.h"
michael@0 26 #include "../public/encodings.h"
michael@0 27
michael@0 28 namespace CLD2 {
michael@0 29
michael@0 30 // Packed <Language, weight>, weight in [-32..31] (powers of 2**1.6 ~=3.03)
michael@0 31 // Full language in bottom 10 bits, weight in top 6 bits
michael@0 32 typedef int16 OneCLDLangPrior;
michael@0 33
michael@0 34 const int kMaxOneCLDLangPrior = 14;
michael@0 35 typedef struct {
michael@0 36 int32 n;
michael@0 37 OneCLDLangPrior prior[kMaxOneCLDLangPrior];
michael@0 38 } CLDLangPriors;
michael@0 39
michael@0 40 // Reading exposed here; setting hidden in .cc
michael@0 41 inline int GetCLDPriorWeight(OneCLDLangPrior olp) {
michael@0 42 return olp >> 10;
michael@0 43 }
michael@0 44 inline Language GetCLDPriorLang(OneCLDLangPrior olp) {
michael@0 45 return static_cast<Language>(olp & 0x3ff);
michael@0 46 }
michael@0 47
michael@0 48 inline int32 GetCLDLangPriorCount(CLDLangPriors* lps) {
michael@0 49 return lps->n;
michael@0 50 }
michael@0 51
michael@0 52 inline void InitCLDLangPriors(CLDLangPriors* lps) {
michael@0 53 lps->n = 0;
michael@0 54 }
michael@0 55
michael@0 56 // Trim language priors to no more than max_entries, keeping largest abs weights
michael@0 57 void TrimCLDLangPriors(int max_entries, CLDLangPriors* lps);
michael@0 58
michael@0 59 // Trim language tag string to canonical form for each language
michael@0 60 // Input is from GetLangTagsFromHtml(), already lowercased
michael@0 61 std::string TrimCLDLangTagsHint(const std::string& langtags);
michael@0 62
michael@0 63 // Add hints to vector of langpriors
michael@0 64 // Input is from GetLangTagsFromHtml(), already lowercased
michael@0 65 void SetCLDLangTagsHint(const std::string& langtags, CLDLangPriors* langpriors);
michael@0 66
michael@0 67 // Add hints to vector of langpriors
michael@0 68 // Input is from HTTP content-language
michael@0 69 void SetCLDContentLangHint(const char* contentlang, CLDLangPriors* langpriors);
michael@0 70
michael@0 71 // Add hints to vector of langpriors
michael@0 72 // Input is from GetTLD(), already lowercased
michael@0 73 void SetCLDTLDHint(const char* tld, CLDLangPriors* langpriors);
michael@0 74
michael@0 75 // Add hints to vector of langpriors
michael@0 76 // Input is from DetectEncoding()
michael@0 77 void SetCLDEncodingHint(Encoding enc, CLDLangPriors* langpriors);
michael@0 78
michael@0 79 // Add hints to vector of langpriors
michael@0 80 // Input is from random source
michael@0 81 void SetCLDLanguageHint(Language lang, CLDLangPriors* langpriors);
michael@0 82
michael@0 83 // Make printable string of priors
michael@0 84 std::string DumpCLDLangPriors(const CLDLangPriors* langpriors);
michael@0 85
michael@0 86
michael@0 87 // Get language tag hints from HTML body
michael@0 88 // Normalize: remove spaces and make lowercase comma list
michael@0 89 std::string GetLangTagsFromHtml(const char* utf8_body, int32 utf8_body_len,
michael@0 90 int32 max_scan_bytes);
michael@0 91
michael@0 92 } // End namespace CLD2
michael@0 93
michael@0 94 #endif // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
michael@0 95

mercurial