browser/components/translation/cld2/internal/compact_lang_det_hint_code.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/browser/components/translation/cld2/internal/compact_lang_det_hint_code.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,95 @@
     1.4 +// Copyright 2013 Google Inc. All Rights Reserved.
     1.5 +//
     1.6 +// Licensed under the Apache License, Version 2.0 (the "License");
     1.7 +// you may not use this file except in compliance with the License.
     1.8 +// You may obtain a copy of the License at
     1.9 +//
    1.10 +//     http://www.apache.org/licenses/LICENSE-2.0
    1.11 +//
    1.12 +// Unless required by applicable law or agreed to in writing, software
    1.13 +// distributed under the License is distributed on an "AS IS" BASIS,
    1.14 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    1.15 +// See the License for the specific language governing permissions and
    1.16 +// limitations under the License.
    1.17 +
    1.18 +//
    1.19 +// Author: dsites@google.com (Dick Sites)
    1.20 +//
    1.21 +
    1.22 +#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
    1.23 +#define I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
    1.24 +
    1.25 +
    1.26 +#include <string>
    1.27 +#include "integral_types.h"
    1.28 +#include "lang_script.h"
    1.29 +#include "../public/encodings.h"
    1.30 +
    1.31 +namespace CLD2 {
    1.32 +
    1.33 +// Packed <Language, weight>, weight in [-32..31] (powers of 2**1.6 ~=3.03)
    1.34 +// Full language in bottom 10 bits, weight in top 6 bits
    1.35 +typedef int16 OneCLDLangPrior;
    1.36 +
    1.37 +const int kMaxOneCLDLangPrior = 14;
    1.38 +typedef struct {
    1.39 +  int32 n;
    1.40 +  OneCLDLangPrior prior[kMaxOneCLDLangPrior];
    1.41 +} CLDLangPriors;
    1.42 +
    1.43 +// Reading exposed here; setting hidden in .cc
    1.44 +inline int GetCLDPriorWeight(OneCLDLangPrior olp) {
    1.45 +  return olp >> 10;
    1.46 +}
    1.47 +inline Language GetCLDPriorLang(OneCLDLangPrior olp) {
    1.48 +  return static_cast<Language>(olp & 0x3ff);
    1.49 +}
    1.50 +
    1.51 +inline int32 GetCLDLangPriorCount(CLDLangPriors* lps) {
    1.52 +  return lps->n;
    1.53 +}
    1.54 +
    1.55 +inline void InitCLDLangPriors(CLDLangPriors* lps) {
    1.56 +  lps->n = 0;
    1.57 +}
    1.58 +
    1.59 +// Trim language priors to no more than max_entries, keeping largest abs weights
    1.60 +void TrimCLDLangPriors(int max_entries, CLDLangPriors* lps);
    1.61 +
    1.62 +// Trim language tag string to canonical form for each language
    1.63 +// Input is from GetLangTagsFromHtml(), already lowercased
    1.64 +std::string TrimCLDLangTagsHint(const std::string& langtags);
    1.65 +
    1.66 +// Add hints to vector of langpriors
    1.67 +// Input is from GetLangTagsFromHtml(), already lowercased
    1.68 +void SetCLDLangTagsHint(const std::string& langtags, CLDLangPriors* langpriors);
    1.69 +
    1.70 +// Add hints to vector of langpriors
    1.71 +// Input is from HTTP content-language
    1.72 +void SetCLDContentLangHint(const char* contentlang, CLDLangPriors* langpriors);
    1.73 +
    1.74 +// Add hints to vector of langpriors
    1.75 +// Input is from GetTLD(), already lowercased
    1.76 +void SetCLDTLDHint(const char* tld, CLDLangPriors* langpriors);
    1.77 +
    1.78 +// Add hints to vector of langpriors
    1.79 +// Input is from DetectEncoding()
    1.80 +void SetCLDEncodingHint(Encoding enc, CLDLangPriors* langpriors);
    1.81 +
    1.82 +// Add hints to vector of langpriors
    1.83 +// Input is from random source
    1.84 +void SetCLDLanguageHint(Language lang, CLDLangPriors* langpriors);
    1.85 +
    1.86 +// Make printable string of priors
    1.87 +std::string DumpCLDLangPriors(const CLDLangPriors* langpriors);
    1.88 +
    1.89 +
    1.90 +// Get language tag hints from HTML body
    1.91 +// Normalize: remove spaces and make lowercase comma list
    1.92 +std::string GetLangTagsFromHtml(const char* utf8_body, int32 utf8_body_len,
    1.93 +                           int32 max_scan_bytes);
    1.94 +
    1.95 +}       // End namespace CLD2
    1.96 +
    1.97 +#endif  // I18N_ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_HINT_CODE_H__
    1.98 +

mercurial