browser/components/translation/cld2/internal/offsetmap.h

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:5ef6dd11a07b
1 // Copyright 2013 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 //
16 // Author: dsites@google.com (Dick Sites)
17 //
18
19 #ifndef UTIL_UTF8_OFFSETMAP_H_
20 #define UTIL_UTF8_OFFSETMAP_H_
21
22 #include <string> // for string
23 #include "integral_types.h" // for uint32
24
25 // ***************************** OffsetMap **************************
26 //
27 // An OffsetMap object is a container for a mapping from offsets in one text
28 // buffer A' to offsets in another text buffer A. It is most useful when A' is
29 // built from A via substitutions that occasionally do not preserve byte length.
30 //
31 // A series of operators are used to build the correspondence map, then
32 // calls can be made to map an offset in A' to an offset in A, or vice versa.
33 // The map starts with offset 0 in A corresponding to offset 0 in A'.
34 // The mapping is then built sequentially, adding on byte ranges that are
35 // identical in A and A', byte ranges that are inserted in A', and byte ranges
36 // that are deleted from A. All bytes beyond those specified when building the
37 // map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
38 // end of the map.
39 //
40 // The internal data structure records positions at which bytes are added or
41 // deleted. Using the map is O(1) when increasing the A' or A offset
42 // monotonically, and O(n) when accessing random offsets, where n is the
43 // number of differences.
44 //
45
46 namespace CLD2 {
47
48 class OffsetMap {
49 public:
50 // Constructor, destructor
51 OffsetMap();
52 ~OffsetMap();
53
54 // Clear the map
55 void Clear();
56
57 // Add to mapping from A to A', specifying how many next bytes correspond
58 // in A and A'
59 void Copy(int bytes);
60
61 // Add to mapping from A to A', specifying how many next bytes are
62 // inserted in A' while not advancing in A at all
63 void Insert(int bytes);
64
65 // Add to mapping from A to A', specifying how many next bytes are
66 // deleted from A while not advancing in A' at all
67 void Delete(int bytes);
68
69 // Print map to file, for debugging
70 void Printmap(const char* filename);
71
72 // [Finish building map,] Re-position to offset 0
73 // This call is optional; MapForward and MapBack finish building the map
74 // if necessary
75 void Reset();
76
77 // Map an offset in A' to the corresponding offset in A
78 int MapBack(int aprimeoffset);
79
80 // Map an offset in A to the corresponding offset in A'
81 int MapForward(int aoffset);
82
83 // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
84 // from A' to A'' and h is from A to A''.
85 //
86 // Note that g->MoveForward(f->MoveForward(aoffset)) always equals
87 // to h->MoveForward(aoffset), while
88 // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
89 // to h->MoveBack(aprimeprimeoffset). This happens when deletion in
90 // f and insertion in g are at the same place. For example,
91 //
92 // A 1 2 3 4
93 // ^ | ^ ^
94 // | | / | f
95 // v vv v
96 // A' 1' 2' 3'
97 // ^ ^^ ^
98 // | | \ | g
99 // v | v v
100 // A'' 1'' 2'' 3'' 4''
101 //
102 // results in:
103 //
104 // A 1 2 3 4
105 // ^ ^\ ^ ^
106 // | | \ | | h
107 // v | vv v
108 // A'' 1'' 2'' 3'' 4''
109 //
110 // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
111 // the latter figure.
112 static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
113
114 // For debugging only; writes to stderr
115 void DumpWindow();
116
117 // For testing only -- force a mapping
118 void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
119
120 private:
121 enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
122
123 void Flush();
124 void FlushAll();
125 void MaybeFlushAll();
126 void Emit(MapOp op, int len);
127
128 void SetLeft();
129 void SetRight();
130
131 // Back up over previous range, 1..5 bytes
132 // Return subscript at the beginning of that. Pins at 0
133 int Backup(int sub);
134
135 // Parse next range, 1..5 bytes
136 // Return subscript just off the end of that
137 int ParseNext(int sub, MapOp* op, int* length);
138
139 // Parse previous range, 1..5 bytes
140 // Return current subscript
141 int ParsePrevious(int sub, MapOp* op, int* length);
142
143 void PrintPosition(const char* str);
144
145 bool MoveRight(); // Returns true if OK
146 bool MoveLeft(); // Returns true if OK
147 void DumpString();
148
149 // Copies insert operations from source to dest. Returns true if no
150 // other operations are found.
151 static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
152
153 // Copies delete operations from source to dest. Returns true if no other
154 // operations are found.
155 static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
156
157 std::string diffs_;
158 MapOp pending_op_;
159 uint32 pending_length_;
160
161 // Offsets in the ranges below correspond to each other, with A' = A + diff
162 int next_diff_sub_;
163 int current_lo_aoffset_;
164 int current_hi_aoffset_;
165 int current_lo_aprimeoffset_;
166 int current_hi_aprimeoffset_;
167 int current_diff_;
168 int max_aoffset_;
169 int max_aprimeoffset_;
170 };
171
172 } // namespace CLD2
173
174 #endif // UTIL_UTF8_OFFSETMAP_H_
175

mercurial