|
1 // Copyright 2013 Google Inc. All Rights Reserved. |
|
2 // |
|
3 // Licensed under the Apache License, Version 2.0 (the "License"); |
|
4 // you may not use this file except in compliance with the License. |
|
5 // You may obtain a copy of the License at |
|
6 // |
|
7 // http://www.apache.org/licenses/LICENSE-2.0 |
|
8 // |
|
9 // Unless required by applicable law or agreed to in writing, software |
|
10 // distributed under the License is distributed on an "AS IS" BASIS, |
|
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
12 // See the License for the specific language governing permissions and |
|
13 // limitations under the License. |
|
14 |
|
15 // |
|
16 // Author: dsites@google.com (Dick Sites) |
|
17 // |
|
18 |
|
19 #ifndef UTIL_UTF8_OFFSETMAP_H_ |
|
20 #define UTIL_UTF8_OFFSETMAP_H_ |
|
21 |
|
22 #include <string> // for string |
|
23 #include "integral_types.h" // for uint32 |
|
24 |
|
25 // ***************************** OffsetMap ************************** |
|
26 // |
|
27 // An OffsetMap object is a container for a mapping from offsets in one text |
|
28 // buffer A' to offsets in another text buffer A. It is most useful when A' is |
|
29 // built from A via substitutions that occasionally do not preserve byte length. |
|
30 // |
|
31 // A series of operators are used to build the correspondence map, then |
|
32 // calls can be made to map an offset in A' to an offset in A, or vice versa. |
|
33 // The map starts with offset 0 in A corresponding to offset 0 in A'. |
|
34 // The mapping is then built sequentially, adding on byte ranges that are |
|
35 // identical in A and A', byte ranges that are inserted in A', and byte ranges |
|
36 // that are deleted from A. All bytes beyond those specified when building the |
|
37 // map are assumed to correspond, i.e. a Copy(infinity) is assumed at the |
|
38 // end of the map. |
|
39 // |
|
40 // The internal data structure records positions at which bytes are added or |
|
41 // deleted. Using the map is O(1) when increasing the A' or A offset |
|
42 // monotonically, and O(n) when accessing random offsets, where n is the |
|
43 // number of differences. |
|
44 // |
|
45 |
|
46 namespace CLD2 { |
|
47 |
|
48 class OffsetMap { |
|
49 public: |
|
50 // Constructor, destructor |
|
51 OffsetMap(); |
|
52 ~OffsetMap(); |
|
53 |
|
54 // Clear the map |
|
55 void Clear(); |
|
56 |
|
57 // Add to mapping from A to A', specifying how many next bytes correspond |
|
58 // in A and A' |
|
59 void Copy(int bytes); |
|
60 |
|
61 // Add to mapping from A to A', specifying how many next bytes are |
|
62 // inserted in A' while not advancing in A at all |
|
63 void Insert(int bytes); |
|
64 |
|
65 // Add to mapping from A to A', specifying how many next bytes are |
|
66 // deleted from A while not advancing in A' at all |
|
67 void Delete(int bytes); |
|
68 |
|
69 // Print map to file, for debugging |
|
70 void Printmap(const char* filename); |
|
71 |
|
72 // [Finish building map,] Re-position to offset 0 |
|
73 // This call is optional; MapForward and MapBack finish building the map |
|
74 // if necessary |
|
75 void Reset(); |
|
76 |
|
77 // Map an offset in A' to the corresponding offset in A |
|
78 int MapBack(int aprimeoffset); |
|
79 |
|
80 // Map an offset in A to the corresponding offset in A' |
|
81 int MapForward(int aoffset); |
|
82 |
|
83 // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is |
|
84 // from A' to A'' and h is from A to A''. |
|
85 // |
|
86 // Note that g->MoveForward(f->MoveForward(aoffset)) always equals |
|
87 // to h->MoveForward(aoffset), while |
|
88 // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals |
|
89 // to h->MoveBack(aprimeprimeoffset). This happens when deletion in |
|
90 // f and insertion in g are at the same place. For example, |
|
91 // |
|
92 // A 1 2 3 4 |
|
93 // ^ | ^ ^ |
|
94 // | | / | f |
|
95 // v vv v |
|
96 // A' 1' 2' 3' |
|
97 // ^ ^^ ^ |
|
98 // | | \ | g |
|
99 // v | v v |
|
100 // A'' 1'' 2'' 3'' 4'' |
|
101 // |
|
102 // results in: |
|
103 // |
|
104 // A 1 2 3 4 |
|
105 // ^ ^\ ^ ^ |
|
106 // | | \ | | h |
|
107 // v | vv v |
|
108 // A'' 1'' 2'' 3'' 4'' |
|
109 // |
|
110 // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in |
|
111 // the latter figure. |
|
112 static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h); |
|
113 |
|
114 // For debugging only; writes to stderr |
|
115 void DumpWindow(); |
|
116 |
|
117 // For testing only -- force a mapping |
|
118 void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset); |
|
119 |
|
120 private: |
|
121 enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP}; |
|
122 |
|
123 void Flush(); |
|
124 void FlushAll(); |
|
125 void MaybeFlushAll(); |
|
126 void Emit(MapOp op, int len); |
|
127 |
|
128 void SetLeft(); |
|
129 void SetRight(); |
|
130 |
|
131 // Back up over previous range, 1..5 bytes |
|
132 // Return subscript at the beginning of that. Pins at 0 |
|
133 int Backup(int sub); |
|
134 |
|
135 // Parse next range, 1..5 bytes |
|
136 // Return subscript just off the end of that |
|
137 int ParseNext(int sub, MapOp* op, int* length); |
|
138 |
|
139 // Parse previous range, 1..5 bytes |
|
140 // Return current subscript |
|
141 int ParsePrevious(int sub, MapOp* op, int* length); |
|
142 |
|
143 void PrintPosition(const char* str); |
|
144 |
|
145 bool MoveRight(); // Returns true if OK |
|
146 bool MoveLeft(); // Returns true if OK |
|
147 void DumpString(); |
|
148 |
|
149 // Copies insert operations from source to dest. Returns true if no |
|
150 // other operations are found. |
|
151 static bool CopyInserts(OffsetMap* source, OffsetMap* dest); |
|
152 |
|
153 // Copies delete operations from source to dest. Returns true if no other |
|
154 // operations are found. |
|
155 static bool CopyDeletes(OffsetMap* source, OffsetMap* dest); |
|
156 |
|
157 std::string diffs_; |
|
158 MapOp pending_op_; |
|
159 uint32 pending_length_; |
|
160 |
|
161 // Offsets in the ranges below correspond to each other, with A' = A + diff |
|
162 int next_diff_sub_; |
|
163 int current_lo_aoffset_; |
|
164 int current_hi_aoffset_; |
|
165 int current_lo_aprimeoffset_; |
|
166 int current_hi_aprimeoffset_; |
|
167 int current_diff_; |
|
168 int max_aoffset_; |
|
169 int max_aprimeoffset_; |
|
170 }; |
|
171 |
|
172 } // namespace CLD2 |
|
173 |
|
174 #endif // UTIL_UTF8_OFFSETMAP_H_ |
|
175 |