Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 // Copyright 2013 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 //
16 // Author: dsites@google.com (Dick Sites)
17 //
19 #ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
20 #define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
23 #include <string>
24 #include "integral_types.h"
25 #include "lang_script.h"
26 #include "../public/encodings.h"
28 namespace CLD2 {
30 // Packed <Language, weight>, weight in [-32..31] (powers of 2**1.6 ~=3.03)
31 // Full language in bottom 10 bits, weight in top 6 bits
32 typedef int16 OneCLDLangPrior;
34 const int kMaxOneCLDLangPrior = 14;
35 typedef struct {
36 int32 n;
37 OneCLDLangPrior prior[kMaxOneCLDLangPrior];
38 } CLDLangPriors;
40 // Reading exposed here; setting hidden in .cc
41 inline int GetCLDPriorWeight(OneCLDLangPrior olp) {
42 return olp >> 10;
43 }
44 inline Language GetCLDPriorLang(OneCLDLangPrior olp) {
45 return static_cast<Language>(olp & 0x3ff);
46 }
48 inline int32 GetCLDLangPriorCount(CLDLangPriors* lps) {
49 return lps->n;
50 }
52 inline void InitCLDLangPriors(CLDLangPriors* lps) {
53 lps->n = 0;
54 }
56 // Trim language priors to no more than max_entries, keeping largest abs weights
57 void TrimCLDLangPriors(int max_entries, CLDLangPriors* lps);
59 // Trim language tag string to canonical form for each language
60 // Input is from GetLangTagsFromHtml(), already lowercased
61 std::string TrimCLDLangTagsHint(const std::string& langtags);
63 // Add hints to vector of langpriors
64 // Input is from GetLangTagsFromHtml(), already lowercased
65 void SetCLDLangTagsHint(const std::string& langtags, CLDLangPriors* langpriors);
67 // Add hints to vector of langpriors
68 // Input is from HTTP content-language
69 void SetCLDContentLangHint(const char* contentlang, CLDLangPriors* langpriors);
71 // Add hints to vector of langpriors
72 // Input is from GetTLD(), already lowercased
73 void SetCLDTLDHint(const char* tld, CLDLangPriors* langpriors);
75 // Add hints to vector of langpriors
76 // Input is from DetectEncoding()
77 void SetCLDEncodingHint(Encoding enc, CLDLangPriors* langpriors);
79 // Add hints to vector of langpriors
80 // Input is from random source
81 void SetCLDLanguageHint(Language lang, CLDLangPriors* langpriors);
83 // Make printable string of priors
84 std::string DumpCLDLangPriors(const CLDLangPriors* langpriors);
87 // Get language tag hints from HTML body
88 // Normalize: remove spaces and make lowercase comma list
89 std::string GetLangTagsFromHtml(const char* utf8_body, int32 utf8_body_len,
90 int32 max_scan_bytes);
92 } // End namespace CLD2
94 #endif // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__