browser/components/translation/cld2/internal/tote.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/browser/components/translation/cld2/internal/tote.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,112 @@
     1.4 +// Copyright 2013 Google Inc. All Rights Reserved.
     1.5 +//
     1.6 +// Licensed under the Apache License, Version 2.0 (the "License");
     1.7 +// you may not use this file except in compliance with the License.
     1.8 +// You may obtain a copy of the License at
     1.9 +//
    1.10 +//     http://www.apache.org/licenses/LICENSE-2.0
    1.11 +//
    1.12 +// Unless required by applicable law or agreed to in writing, software
    1.13 +// distributed under the License is distributed on an "AS IS" BASIS,
    1.14 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    1.15 +// See the License for the specific language governing permissions and
    1.16 +// limitations under the License.
    1.17 +
    1.18 +//
    1.19 +// Author: dsites@google.com (Dick Sites)
    1.20 +//
    1.21 +
    1.22 +#ifndef I18N_ENCODINGS_CLD2_INTERNAL_TOTE_H_
    1.23 +#define I18N_ENCODINGS_CLD2_INTERNAL_TOTE_H_
    1.24 +
    1.25 +#include <stdio.h>
    1.26 +#include "integral_types.h"        // for uint8 etc
    1.27 +
    1.28 +namespace CLD2 {
    1.29 +
    1.30 +
    1.31 +// Take a set of <key, score> pairs and tote them up.
    1.32 +// Key is an 8-bit per-script language
    1.33 +// After explicitly sorting, retrieve top key, score pairs
    1.34 +// Normal use is key=per-script language
    1.35 +// The main data structure is an array of 256 uint16 counts. We normally
    1.36 +// expect this to be initialized, added-to about 60 times, then the top three
    1.37 +// items found. The reduce the initial and final time, we also keep a bit vector
    1.38 +// of unused (and uninitialized) parts, each of 64 bits covering four keys.
    1.39 +class Tote {
    1.40 + public:
    1.41 +  Tote();
    1.42 +  ~Tote();
    1.43 +  void Reinit();
    1.44 +  void AddScoreCount();
    1.45 +  void Add(uint8 ikey, int idelta);
    1.46 +  void AddBytes(int ibytes) {byte_count_ += ibytes;}
    1.47 +  void CurrentTopThreeKeys(int* key3) const;
    1.48 +  int GetScoreCount() const {return score_count_;}
    1.49 +  int GetByteCount() const {return byte_count_;}
    1.50 +  int GetScore(int i) const {return score_[i];}
    1.51 +  void SetScoreCount(uint16 v) {score_count_ = v;}
    1.52 +  void SetScore(int i, int v) {score_[i] = v;}
    1.53 +
    1.54 + private:
    1.55 +  uint64 in_use_mask_;      // 64 bits, one for each group of 4 scores.
    1.56 +                            //    0 = not initialized,not used
    1.57 +  int byte_count_;          // Bytes of text scored
    1.58 +  int score_count_;         // Number of quadgrams/etc. scored
    1.59 +  union {
    1.60 +    uint64 gscore_[64];     // For alignment and clearing quickly
    1.61 +    uint16 score_[256];     // Probability score sum
    1.62 +  };
    1.63 +
    1.64 +};
    1.65 +
    1.66 +
    1.67 +// Take a set of <key, score, reliability> triples and tote them up.
    1.68 +// Key is a 16-bit full language
    1.69 +// After explicitly sorting, retrieve top key, score, reliability triples
    1.70 +class DocTote {
    1.71 + public:
    1.72 +  DocTote();
    1.73 +  ~DocTote();
    1.74 +  void Reinit();
    1.75 +  void Add(uint16 ikey, int ibytes, int score, int ireliability);
    1.76 +  int Find(uint16 ikey);
    1.77 +  void AddClosePair(int subscr, int val) {closepair_[subscr] += val;}
    1.78 +  int CurrentTopKey();
    1.79 +  Tote* RunningScore() {return &runningscore_;}
    1.80 +  void Sort(int n);
    1.81 +  void Dump(FILE* f);
    1.82 +
    1.83 +  int GetIncrCount() const {return incr_count_;}
    1.84 +  int GetClosePair(int subscr) const {return closepair_[subscr];}
    1.85 +  int MaxSize() const {return kMaxSize_;}
    1.86 +  uint16 Key(int i) const {return key_[i];}
    1.87 +  int Value(int i) const {return value_[i];}      // byte count
    1.88 +  int Score(int i) const {return score_[i];}      // sum lg prob
    1.89 +  int Reliability(int i) const {return reliability_[i];}
    1.90 +  void SetKey(int i, int v) {key_[i] = v;}
    1.91 +  void SetValue(int i, int v) {value_[i] = v;}
    1.92 +  void SetScore(int i, int v) {score_[i] = v;}
    1.93 +  void SetReliability(int i, int v) {reliability_[i] = v;}
    1.94 +
    1.95 +  static const uint16 kUnusedKey = 0xFFFF;
    1.96 +
    1.97 + private:
    1.98 +  static const int kMaxSize_ = 24;
    1.99 +  static const int kMaxClosePairSize_ = 8;
   1.100 +
   1.101 +  int incr_count_;         // Number of Add calls
   1.102 +  int sorted_;             // Contents have been sorted, cannot Add
   1.103 +  Tote runningscore_;      // Top lang scores across entire doc, for
   1.104 +                           // helping resolve close pairs
   1.105 +  // Align at multiple of 8 bytes
   1.106 +  int closepair_[kMaxClosePairSize_];
   1.107 +  uint16 key_[kMaxSize_];   // Lang unassigned = 0xFFFF, valid = 1..1023
   1.108 +  int value_[kMaxSize_];    // Bytecount this lang
   1.109 +  int score_[kMaxSize_];    // Probability score sum
   1.110 +  int reliability_[kMaxSize_];  // Percentage 0..100
   1.111 +};
   1.112 +
   1.113 +}       // End namespace CLD2
   1.114 +
   1.115 +#endif  // I18N_ENCODINGS_CLD2_INTERNAL_TOTE_H_

mercurial