browser/components/translation/cld2/internal/offsetmap.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 // Copyright 2013 Google Inc. All Rights Reserved.
michael@0 2 //
michael@0 3 // Licensed under the Apache License, Version 2.0 (the "License");
michael@0 4 // you may not use this file except in compliance with the License.
michael@0 5 // You may obtain a copy of the License at
michael@0 6 //
michael@0 7 // http://www.apache.org/licenses/LICENSE-2.0
michael@0 8 //
michael@0 9 // Unless required by applicable law or agreed to in writing, software
michael@0 10 // distributed under the License is distributed on an "AS IS" BASIS,
michael@0 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
michael@0 12 // See the License for the specific language governing permissions and
michael@0 13 // limitations under the License.
michael@0 14
michael@0 15 //
michael@0 16 // Author: dsites@google.com (Dick Sites)
michael@0 17 //
michael@0 18
michael@0 19 #ifndef UTIL_UTF8_OFFSETMAP_H_
michael@0 20 #define UTIL_UTF8_OFFSETMAP_H_
michael@0 21
michael@0 22 #include <string> // for string
michael@0 23 #include "integral_types.h" // for uint32
michael@0 24
michael@0 25 // ***************************** OffsetMap **************************
michael@0 26 //
michael@0 27 // An OffsetMap object is a container for a mapping from offsets in one text
michael@0 28 // buffer A' to offsets in another text buffer A. It is most useful when A' is
michael@0 29 // built from A via substitutions that occasionally do not preserve byte length.
michael@0 30 //
michael@0 31 // A series of operators are used to build the correspondence map, then
michael@0 32 // calls can be made to map an offset in A' to an offset in A, or vice versa.
michael@0 33 // The map starts with offset 0 in A corresponding to offset 0 in A'.
michael@0 34 // The mapping is then built sequentially, adding on byte ranges that are
michael@0 35 // identical in A and A', byte ranges that are inserted in A', and byte ranges
michael@0 36 // that are deleted from A. All bytes beyond those specified when building the
michael@0 37 // map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
michael@0 38 // end of the map.
michael@0 39 //
michael@0 40 // The internal data structure records positions at which bytes are added or
michael@0 41 // deleted. Using the map is O(1) when increasing the A' or A offset
michael@0 42 // monotonically, and O(n) when accessing random offsets, where n is the
michael@0 43 // number of differences.
michael@0 44 //
michael@0 45
michael@0 46 namespace CLD2 {
michael@0 47
michael@0 48 class OffsetMap {
michael@0 49 public:
michael@0 50 // Constructor, destructor
michael@0 51 OffsetMap();
michael@0 52 ~OffsetMap();
michael@0 53
michael@0 54 // Clear the map
michael@0 55 void Clear();
michael@0 56
michael@0 57 // Add to mapping from A to A', specifying how many next bytes correspond
michael@0 58 // in A and A'
michael@0 59 void Copy(int bytes);
michael@0 60
michael@0 61 // Add to mapping from A to A', specifying how many next bytes are
michael@0 62 // inserted in A' while not advancing in A at all
michael@0 63 void Insert(int bytes);
michael@0 64
michael@0 65 // Add to mapping from A to A', specifying how many next bytes are
michael@0 66 // deleted from A while not advancing in A' at all
michael@0 67 void Delete(int bytes);
michael@0 68
michael@0 69 // Print map to file, for debugging
michael@0 70 void Printmap(const char* filename);
michael@0 71
michael@0 72 // [Finish building map,] Re-position to offset 0
michael@0 73 // This call is optional; MapForward and MapBack finish building the map
michael@0 74 // if necessary
michael@0 75 void Reset();
michael@0 76
michael@0 77 // Map an offset in A' to the corresponding offset in A
michael@0 78 int MapBack(int aprimeoffset);
michael@0 79
michael@0 80 // Map an offset in A to the corresponding offset in A'
michael@0 81 int MapForward(int aoffset);
michael@0 82
michael@0 83 // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
michael@0 84 // from A' to A'' and h is from A to A''.
michael@0 85 //
michael@0 86 // Note that g->MoveForward(f->MoveForward(aoffset)) always equals
michael@0 87 // to h->MoveForward(aoffset), while
michael@0 88 // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
michael@0 89 // to h->MoveBack(aprimeprimeoffset). This happens when deletion in
michael@0 90 // f and insertion in g are at the same place. For example,
michael@0 91 //
michael@0 92 // A 1 2 3 4
michael@0 93 // ^ | ^ ^
michael@0 94 // | | / | f
michael@0 95 // v vv v
michael@0 96 // A' 1' 2' 3'
michael@0 97 // ^ ^^ ^
michael@0 98 // | | \ | g
michael@0 99 // v | v v
michael@0 100 // A'' 1'' 2'' 3'' 4''
michael@0 101 //
michael@0 102 // results in:
michael@0 103 //
michael@0 104 // A 1 2 3 4
michael@0 105 // ^ ^\ ^ ^
michael@0 106 // | | \ | | h
michael@0 107 // v | vv v
michael@0 108 // A'' 1'' 2'' 3'' 4''
michael@0 109 //
michael@0 110 // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
michael@0 111 // the latter figure.
michael@0 112 static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
michael@0 113
michael@0 114 // For debugging only; writes to stderr
michael@0 115 void DumpWindow();
michael@0 116
michael@0 117 // For testing only -- force a mapping
michael@0 118 void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
michael@0 119
michael@0 120 private:
michael@0 121 enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
michael@0 122
michael@0 123 void Flush();
michael@0 124 void FlushAll();
michael@0 125 void MaybeFlushAll();
michael@0 126 void Emit(MapOp op, int len);
michael@0 127
michael@0 128 void SetLeft();
michael@0 129 void SetRight();
michael@0 130
michael@0 131 // Back up over previous range, 1..5 bytes
michael@0 132 // Return subscript at the beginning of that. Pins at 0
michael@0 133 int Backup(int sub);
michael@0 134
michael@0 135 // Parse next range, 1..5 bytes
michael@0 136 // Return subscript just off the end of that
michael@0 137 int ParseNext(int sub, MapOp* op, int* length);
michael@0 138
michael@0 139 // Parse previous range, 1..5 bytes
michael@0 140 // Return current subscript
michael@0 141 int ParsePrevious(int sub, MapOp* op, int* length);
michael@0 142
michael@0 143 void PrintPosition(const char* str);
michael@0 144
michael@0 145 bool MoveRight(); // Returns true if OK
michael@0 146 bool MoveLeft(); // Returns true if OK
michael@0 147 void DumpString();
michael@0 148
michael@0 149 // Copies insert operations from source to dest. Returns true if no
michael@0 150 // other operations are found.
michael@0 151 static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
michael@0 152
michael@0 153 // Copies delete operations from source to dest. Returns true if no other
michael@0 154 // operations are found.
michael@0 155 static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
michael@0 156
michael@0 157 std::string diffs_;
michael@0 158 MapOp pending_op_;
michael@0 159 uint32 pending_length_;
michael@0 160
michael@0 161 // Offsets in the ranges below correspond to each other, with A' = A + diff
michael@0 162 int next_diff_sub_;
michael@0 163 int current_lo_aoffset_;
michael@0 164 int current_hi_aoffset_;
michael@0 165 int current_lo_aprimeoffset_;
michael@0 166 int current_hi_aprimeoffset_;
michael@0 167 int current_diff_;
michael@0 168 int max_aoffset_;
michael@0 169 int max_aprimeoffset_;
michael@0 170 };
michael@0 171
michael@0 172 } // namespace CLD2
michael@0 173
michael@0 174 #endif // UTIL_UTF8_OFFSETMAP_H_
michael@0 175

mercurial