Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | // Copyright 2013 Google Inc. All Rights Reserved. |
michael@0 | 2 | // |
michael@0 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
michael@0 | 4 | // you may not use this file except in compliance with the License. |
michael@0 | 5 | // You may obtain a copy of the License at |
michael@0 | 6 | // |
michael@0 | 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
michael@0 | 8 | // |
michael@0 | 9 | // Unless required by applicable law or agreed to in writing, software |
michael@0 | 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
michael@0 | 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
michael@0 | 12 | // See the License for the specific language governing permissions and |
michael@0 | 13 | // limitations under the License. |
michael@0 | 14 | |
michael@0 | 15 | // |
michael@0 | 16 | // Author: dsites@google.com (Dick Sites) |
michael@0 | 17 | // |
michael@0 | 18 | |
michael@0 | 19 | #ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__ |
michael@0 | 20 | #define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__ |
michael@0 | 21 | |
michael@0 | 22 | |
michael@0 | 23 | #include <string> |
michael@0 | 24 | #include "integral_types.h" |
michael@0 | 25 | #include "lang_script.h" |
michael@0 | 26 | #include "../public/encodings.h" |
michael@0 | 27 | |
michael@0 | 28 | namespace CLD2 { |
michael@0 | 29 | |
michael@0 | 30 | // Packed <Language, weight>, weight in [-32..31] (powers of 2**1.6 ~=3.03) |
michael@0 | 31 | // Full language in bottom 10 bits, weight in top 6 bits |
michael@0 | 32 | typedef int16 OneCLDLangPrior; |
michael@0 | 33 | |
michael@0 | 34 | const int kMaxOneCLDLangPrior = 14; |
michael@0 | 35 | typedef struct { |
michael@0 | 36 | int32 n; |
michael@0 | 37 | OneCLDLangPrior prior[kMaxOneCLDLangPrior]; |
michael@0 | 38 | } CLDLangPriors; |
michael@0 | 39 | |
michael@0 | 40 | // Reading exposed here; setting hidden in .cc |
michael@0 | 41 | inline int GetCLDPriorWeight(OneCLDLangPrior olp) { |
michael@0 | 42 | return olp >> 10; |
michael@0 | 43 | } |
michael@0 | 44 | inline Language GetCLDPriorLang(OneCLDLangPrior olp) { |
michael@0 | 45 | return static_cast<Language>(olp & 0x3ff); |
michael@0 | 46 | } |
michael@0 | 47 | |
michael@0 | 48 | inline int32 GetCLDLangPriorCount(CLDLangPriors* lps) { |
michael@0 | 49 | return lps->n; |
michael@0 | 50 | } |
michael@0 | 51 | |
michael@0 | 52 | inline void InitCLDLangPriors(CLDLangPriors* lps) { |
michael@0 | 53 | lps->n = 0; |
michael@0 | 54 | } |
michael@0 | 55 | |
michael@0 | 56 | // Trim language priors to no more than max_entries, keeping largest abs weights |
michael@0 | 57 | void TrimCLDLangPriors(int max_entries, CLDLangPriors* lps); |
michael@0 | 58 | |
michael@0 | 59 | // Trim language tag string to canonical form for each language |
michael@0 | 60 | // Input is from GetLangTagsFromHtml(), already lowercased |
michael@0 | 61 | std::string TrimCLDLangTagsHint(const std::string& langtags); |
michael@0 | 62 | |
michael@0 | 63 | // Add hints to vector of langpriors |
michael@0 | 64 | // Input is from GetLangTagsFromHtml(), already lowercased |
michael@0 | 65 | void SetCLDLangTagsHint(const std::string& langtags, CLDLangPriors* langpriors); |
michael@0 | 66 | |
michael@0 | 67 | // Add hints to vector of langpriors |
michael@0 | 68 | // Input is from HTTP content-language |
michael@0 | 69 | void SetCLDContentLangHint(const char* contentlang, CLDLangPriors* langpriors); |
michael@0 | 70 | |
michael@0 | 71 | // Add hints to vector of langpriors |
michael@0 | 72 | // Input is from GetTLD(), already lowercased |
michael@0 | 73 | void SetCLDTLDHint(const char* tld, CLDLangPriors* langpriors); |
michael@0 | 74 | |
michael@0 | 75 | // Add hints to vector of langpriors |
michael@0 | 76 | // Input is from DetectEncoding() |
michael@0 | 77 | void SetCLDEncodingHint(Encoding enc, CLDLangPriors* langpriors); |
michael@0 | 78 | |
michael@0 | 79 | // Add hints to vector of langpriors |
michael@0 | 80 | // Input is from random source |
michael@0 | 81 | void SetCLDLanguageHint(Language lang, CLDLangPriors* langpriors); |
michael@0 | 82 | |
michael@0 | 83 | // Make printable string of priors |
michael@0 | 84 | std::string DumpCLDLangPriors(const CLDLangPriors* langpriors); |
michael@0 | 85 | |
michael@0 | 86 | |
michael@0 | 87 | // Get language tag hints from HTML body |
michael@0 | 88 | // Normalize: remove spaces and make lowercase comma list |
michael@0 | 89 | std::string GetLangTagsFromHtml(const char* utf8_body, int32 utf8_body_len, |
michael@0 | 90 | int32 max_scan_bytes); |
michael@0 | 91 | |
michael@0 | 92 | } // End namespace CLD2 |
michael@0 | 93 | |
michael@0 | 94 | #endif // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__ |
michael@0 | 95 |