1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/browser/components/translation/cld2/internal/compact_lang_det_hint_code.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,95 @@ 1.4 +// Copyright 2013 Google Inc. All Rights Reserved. 1.5 +// 1.6 +// Licensed under the Apache License, Version 2.0 (the "License"); 1.7 +// you may not use this file except in compliance with the License. 1.8 +// You may obtain a copy of the License at 1.9 +// 1.10 +// http://www.apache.org/licenses/LICENSE-2.0 1.11 +// 1.12 +// Unless required by applicable law or agreed to in writing, software 1.13 +// distributed under the License is distributed on an "AS IS" BASIS, 1.14 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1.15 +// See the License for the specific language governing permissions and 1.16 +// limitations under the License. 1.17 + 1.18 +// 1.19 +// Author: dsites@google.com (Dick Sites) 1.20 +// 1.21 + 1.22 +#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__ 1.23 +#define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__ 1.24 + 1.25 + 1.26 +#include <string> 1.27 +#include "integral_types.h" 1.28 +#include "lang_script.h" 1.29 +#include "../public/encodings.h" 1.30 + 1.31 +namespace CLD2 { 1.32 + 1.33 +// Packed <Language, weight>, weight in [-32..31] (powers of 2**1.6 ~=3.03) 1.34 +// Full language in bottom 10 bits, weight in top 6 bits 1.35 +typedef int16 OneCLDLangPrior; 1.36 + 1.37 +const int kMaxOneCLDLangPrior = 14; 1.38 +typedef struct { 1.39 + int32 n; 1.40 + OneCLDLangPrior prior[kMaxOneCLDLangPrior]; 1.41 +} CLDLangPriors; 1.42 + 1.43 +// Reading exposed here; setting hidden in .cc 1.44 +inline int GetCLDPriorWeight(OneCLDLangPrior olp) { 1.45 + return olp >> 10; 1.46 +} 1.47 +inline Language GetCLDPriorLang(OneCLDLangPrior olp) { 1.48 + return static_cast<Language>(olp & 0x3ff); 1.49 +} 1.50 + 1.51 +inline int32 GetCLDLangPriorCount(CLDLangPriors* lps) { 1.52 + return lps->n; 1.53 +} 1.54 + 1.55 +inline void InitCLDLangPriors(CLDLangPriors* lps) { 1.56 + lps->n = 0; 1.57 +} 1.58 + 1.59 +// Trim language priors to no more than max_entries, keeping largest abs weights 1.60 +void TrimCLDLangPriors(int max_entries, CLDLangPriors* lps); 1.61 + 1.62 +// Trim language tag string to canonical form for each language 1.63 +// Input is from GetLangTagsFromHtml(), already lowercased 1.64 +std::string TrimCLDLangTagsHint(const std::string& langtags); 1.65 + 1.66 +// Add hints to vector of langpriors 1.67 +// Input is from GetLangTagsFromHtml(), already lowercased 1.68 +void SetCLDLangTagsHint(const std::string& langtags, CLDLangPriors* langpriors); 1.69 + 1.70 +// Add hints to vector of langpriors 1.71 +// Input is from HTTP content-language 1.72 +void SetCLDContentLangHint(const char* contentlang, CLDLangPriors* langpriors); 1.73 + 1.74 +// Add hints to vector of langpriors 1.75 +// Input is from GetTLD(), already lowercased 1.76 +void SetCLDTLDHint(const char* tld, CLDLangPriors* langpriors); 1.77 + 1.78 +// Add hints to vector of langpriors 1.79 +// Input is from DetectEncoding() 1.80 +void SetCLDEncodingHint(Encoding enc, CLDLangPriors* langpriors); 1.81 + 1.82 +// Add hints to vector of langpriors 1.83 +// Input is from random source 1.84 +void SetCLDLanguageHint(Language lang, CLDLangPriors* langpriors); 1.85 + 1.86 +// Make printable string of priors 1.87 +std::string DumpCLDLangPriors(const CLDLangPriors* langpriors); 1.88 + 1.89 + 1.90 +// Get language tag hints from HTML body 1.91 +// Normalize: remove spaces and make lowercase comma list 1.92 +std::string GetLangTagsFromHtml(const char* utf8_body, int32 utf8_body_len, 1.93 + int32 max_scan_bytes); 1.94 + 1.95 +} // End namespace CLD2 1.96 + 1.97 +#endif // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__ 1.98 +