Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | // Copyright 2013 Google Inc. All Rights Reserved. |
michael@0 | 2 | // |
michael@0 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
michael@0 | 4 | // you may not use this file except in compliance with the License. |
michael@0 | 5 | // You may obtain a copy of the License at |
michael@0 | 6 | // |
michael@0 | 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
michael@0 | 8 | // |
michael@0 | 9 | // Unless required by applicable law or agreed to in writing, software |
michael@0 | 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
michael@0 | 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
michael@0 | 12 | // See the License for the specific language governing permissions and |
michael@0 | 13 | // limitations under the License. |
michael@0 | 14 | |
michael@0 | 15 | // |
michael@0 | 16 | // Author: dsites@google.com (Dick Sites) |
michael@0 | 17 | // |
michael@0 | 18 | |
michael@0 | 19 | #ifndef I18N_ENCODINGS_CLD2_INTERNAL_TOTE_H_ |
michael@0 | 20 | #define I18N_ENCODINGS_CLD2_INTERNAL_TOTE_H_ |
michael@0 | 21 | |
michael@0 | 22 | #include <stdio.h> |
michael@0 | 23 | #include "integral_types.h" // for uint8 etc |
michael@0 | 24 | |
michael@0 | 25 | namespace CLD2 { |
michael@0 | 26 | |
michael@0 | 27 | |
michael@0 | 28 | // Take a set of <key, score> pairs and tote them up. |
michael@0 | 29 | // Key is an 8-bit per-script language |
michael@0 | 30 | // After explicitly sorting, retrieve top key, score pairs |
michael@0 | 31 | // Normal use is key=per-script language |
michael@0 | 32 | // The main data structure is an array of 256 uint16 counts. We normally |
michael@0 | 33 | // expect this to be initialized, added-to about 60 times, then the top three |
michael@0 | 34 | // items found. The reduce the initial and final time, we also keep a bit vector |
michael@0 | 35 | // of unused (and uninitialized) parts, each of 64 bits covering four keys. |
michael@0 | 36 | class Tote { |
michael@0 | 37 | public: |
michael@0 | 38 | Tote(); |
michael@0 | 39 | ~Tote(); |
michael@0 | 40 | void Reinit(); |
michael@0 | 41 | void AddScoreCount(); |
michael@0 | 42 | void Add(uint8 ikey, int idelta); |
michael@0 | 43 | void AddBytes(int ibytes) {byte_count_ += ibytes;} |
michael@0 | 44 | void CurrentTopThreeKeys(int* key3) const; |
michael@0 | 45 | int GetScoreCount() const {return score_count_;} |
michael@0 | 46 | int GetByteCount() const {return byte_count_;} |
michael@0 | 47 | int GetScore(int i) const {return score_[i];} |
michael@0 | 48 | void SetScoreCount(uint16 v) {score_count_ = v;} |
michael@0 | 49 | void SetScore(int i, int v) {score_[i] = v;} |
michael@0 | 50 | |
michael@0 | 51 | private: |
michael@0 | 52 | uint64 in_use_mask_; // 64 bits, one for each group of 4 scores. |
michael@0 | 53 | // 0 = not initialized,not used |
michael@0 | 54 | int byte_count_; // Bytes of text scored |
michael@0 | 55 | int score_count_; // Number of quadgrams/etc. scored |
michael@0 | 56 | union { |
michael@0 | 57 | uint64 gscore_[64]; // For alignment and clearing quickly |
michael@0 | 58 | uint16 score_[256]; // Probability score sum |
michael@0 | 59 | }; |
michael@0 | 60 | |
michael@0 | 61 | }; |
michael@0 | 62 | |
michael@0 | 63 | |
michael@0 | 64 | // Take a set of <key, score, reliability> triples and tote them up. |
michael@0 | 65 | // Key is a 16-bit full language |
michael@0 | 66 | // After explicitly sorting, retrieve top key, score, reliability triples |
michael@0 | 67 | class DocTote { |
michael@0 | 68 | public: |
michael@0 | 69 | DocTote(); |
michael@0 | 70 | ~DocTote(); |
michael@0 | 71 | void Reinit(); |
michael@0 | 72 | void Add(uint16 ikey, int ibytes, int score, int ireliability); |
michael@0 | 73 | int Find(uint16 ikey); |
michael@0 | 74 | void AddClosePair(int subscr, int val) {closepair_[subscr] += val;} |
michael@0 | 75 | int CurrentTopKey(); |
michael@0 | 76 | Tote* RunningScore() {return &runningscore_;} |
michael@0 | 77 | void Sort(int n); |
michael@0 | 78 | void Dump(FILE* f); |
michael@0 | 79 | |
michael@0 | 80 | int GetIncrCount() const {return incr_count_;} |
michael@0 | 81 | int GetClosePair(int subscr) const {return closepair_[subscr];} |
michael@0 | 82 | int MaxSize() const {return kMaxSize_;} |
michael@0 | 83 | uint16 Key(int i) const {return key_[i];} |
michael@0 | 84 | int Value(int i) const {return value_[i];} // byte count |
michael@0 | 85 | int Score(int i) const {return score_[i];} // sum lg prob |
michael@0 | 86 | int Reliability(int i) const {return reliability_[i];} |
michael@0 | 87 | void SetKey(int i, int v) {key_[i] = v;} |
michael@0 | 88 | void SetValue(int i, int v) {value_[i] = v;} |
michael@0 | 89 | void SetScore(int i, int v) {score_[i] = v;} |
michael@0 | 90 | void SetReliability(int i, int v) {reliability_[i] = v;} |
michael@0 | 91 | |
michael@0 | 92 | static const uint16 kUnusedKey = 0xFFFF; |
michael@0 | 93 | |
michael@0 | 94 | private: |
michael@0 | 95 | static const int kMaxSize_ = 24; |
michael@0 | 96 | static const int kMaxClosePairSize_ = 8; |
michael@0 | 97 | |
michael@0 | 98 | int incr_count_; // Number of Add calls |
michael@0 | 99 | int sorted_; // Contents have been sorted, cannot Add |
michael@0 | 100 | Tote runningscore_; // Top lang scores across entire doc, for |
michael@0 | 101 | // helping resolve close pairs |
michael@0 | 102 | // Align at multiple of 8 bytes |
michael@0 | 103 | int closepair_[kMaxClosePairSize_]; |
michael@0 | 104 | uint16 key_[kMaxSize_]; // Lang unassigned = 0xFFFF, valid = 1..1023 |
michael@0 | 105 | int value_[kMaxSize_]; // Bytecount this lang |
michael@0 | 106 | int score_[kMaxSize_]; // Probability score sum |
michael@0 | 107 | int reliability_[kMaxSize_]; // Percentage 0..100 |
michael@0 | 108 | }; |
michael@0 | 109 | |
michael@0 | 110 | } // End namespace CLD2 |
michael@0 | 111 | |
michael@0 | 112 | #endif // I18N_ENCODINGS_CLD2_INTERNAL_TOTE_H_ |