Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | // Copyright 2013 Google Inc. All Rights Reserved. |
michael@0 | 2 | // |
michael@0 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
michael@0 | 4 | // you may not use this file except in compliance with the License. |
michael@0 | 5 | // You may obtain a copy of the License at |
michael@0 | 6 | // |
michael@0 | 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
michael@0 | 8 | // |
michael@0 | 9 | // Unless required by applicable law or agreed to in writing, software |
michael@0 | 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
michael@0 | 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
michael@0 | 12 | // See the License for the specific language governing permissions and |
michael@0 | 13 | // limitations under the License. |
michael@0 | 14 | |
michael@0 | 15 | // |
michael@0 | 16 | // Author: dsites@google.com (Dick Sites) |
michael@0 | 17 | // |
michael@0 | 18 | |
michael@0 | 19 | #ifndef UTIL_UTF8_OFFSETMAP_H_ |
michael@0 | 20 | #define UTIL_UTF8_OFFSETMAP_H_ |
michael@0 | 21 | |
michael@0 | 22 | #include <string> // for string |
michael@0 | 23 | #include "integral_types.h" // for uint32 |
michael@0 | 24 | |
michael@0 | 25 | // ***************************** OffsetMap ************************** |
michael@0 | 26 | // |
michael@0 | 27 | // An OffsetMap object is a container for a mapping from offsets in one text |
michael@0 | 28 | // buffer A' to offsets in another text buffer A. It is most useful when A' is |
michael@0 | 29 | // built from A via substitutions that occasionally do not preserve byte length. |
michael@0 | 30 | // |
michael@0 | 31 | // A series of operators are used to build the correspondence map, then |
michael@0 | 32 | // calls can be made to map an offset in A' to an offset in A, or vice versa. |
michael@0 | 33 | // The map starts with offset 0 in A corresponding to offset 0 in A'. |
michael@0 | 34 | // The mapping is then built sequentially, adding on byte ranges that are |
michael@0 | 35 | // identical in A and A', byte ranges that are inserted in A', and byte ranges |
michael@0 | 36 | // that are deleted from A. All bytes beyond those specified when building the |
michael@0 | 37 | // map are assumed to correspond, i.e. a Copy(infinity) is assumed at the |
michael@0 | 38 | // end of the map. |
michael@0 | 39 | // |
michael@0 | 40 | // The internal data structure records positions at which bytes are added or |
michael@0 | 41 | // deleted. Using the map is O(1) when increasing the A' or A offset |
michael@0 | 42 | // monotonically, and O(n) when accessing random offsets, where n is the |
michael@0 | 43 | // number of differences. |
michael@0 | 44 | // |
michael@0 | 45 | |
michael@0 | 46 | namespace CLD2 { |
michael@0 | 47 | |
michael@0 | 48 | class OffsetMap { |
michael@0 | 49 | public: |
michael@0 | 50 | // Constructor, destructor |
michael@0 | 51 | OffsetMap(); |
michael@0 | 52 | ~OffsetMap(); |
michael@0 | 53 | |
michael@0 | 54 | // Clear the map |
michael@0 | 55 | void Clear(); |
michael@0 | 56 | |
michael@0 | 57 | // Add to mapping from A to A', specifying how many next bytes correspond |
michael@0 | 58 | // in A and A' |
michael@0 | 59 | void Copy(int bytes); |
michael@0 | 60 | |
michael@0 | 61 | // Add to mapping from A to A', specifying how many next bytes are |
michael@0 | 62 | // inserted in A' while not advancing in A at all |
michael@0 | 63 | void Insert(int bytes); |
michael@0 | 64 | |
michael@0 | 65 | // Add to mapping from A to A', specifying how many next bytes are |
michael@0 | 66 | // deleted from A while not advancing in A' at all |
michael@0 | 67 | void Delete(int bytes); |
michael@0 | 68 | |
michael@0 | 69 | // Print map to file, for debugging |
michael@0 | 70 | void Printmap(const char* filename); |
michael@0 | 71 | |
michael@0 | 72 | // [Finish building map,] Re-position to offset 0 |
michael@0 | 73 | // This call is optional; MapForward and MapBack finish building the map |
michael@0 | 74 | // if necessary |
michael@0 | 75 | void Reset(); |
michael@0 | 76 | |
michael@0 | 77 | // Map an offset in A' to the corresponding offset in A |
michael@0 | 78 | int MapBack(int aprimeoffset); |
michael@0 | 79 | |
michael@0 | 80 | // Map an offset in A to the corresponding offset in A' |
michael@0 | 81 | int MapForward(int aoffset); |
michael@0 | 82 | |
michael@0 | 83 | // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is |
michael@0 | 84 | // from A' to A'' and h is from A to A''. |
michael@0 | 85 | // |
michael@0 | 86 | // Note that g->MoveForward(f->MoveForward(aoffset)) always equals |
michael@0 | 87 | // to h->MoveForward(aoffset), while |
michael@0 | 88 | // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals |
michael@0 | 89 | // to h->MoveBack(aprimeprimeoffset). This happens when deletion in |
michael@0 | 90 | // f and insertion in g are at the same place. For example, |
michael@0 | 91 | // |
michael@0 | 92 | // A 1 2 3 4 |
michael@0 | 93 | // ^ | ^ ^ |
michael@0 | 94 | // | | / | f |
michael@0 | 95 | // v vv v |
michael@0 | 96 | // A' 1' 2' 3' |
michael@0 | 97 | // ^ ^^ ^ |
michael@0 | 98 | // | | \ | g |
michael@0 | 99 | // v | v v |
michael@0 | 100 | // A'' 1'' 2'' 3'' 4'' |
michael@0 | 101 | // |
michael@0 | 102 | // results in: |
michael@0 | 103 | // |
michael@0 | 104 | // A 1 2 3 4 |
michael@0 | 105 | // ^ ^\ ^ ^ |
michael@0 | 106 | // | | \ | | h |
michael@0 | 107 | // v | vv v |
michael@0 | 108 | // A'' 1'' 2'' 3'' 4'' |
michael@0 | 109 | // |
michael@0 | 110 | // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in |
michael@0 | 111 | // the latter figure. |
michael@0 | 112 | static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h); |
michael@0 | 113 | |
michael@0 | 114 | // For debugging only; writes to stderr |
michael@0 | 115 | void DumpWindow(); |
michael@0 | 116 | |
michael@0 | 117 | // For testing only -- force a mapping |
michael@0 | 118 | void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset); |
michael@0 | 119 | |
michael@0 | 120 | private: |
michael@0 | 121 | enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP}; |
michael@0 | 122 | |
michael@0 | 123 | void Flush(); |
michael@0 | 124 | void FlushAll(); |
michael@0 | 125 | void MaybeFlushAll(); |
michael@0 | 126 | void Emit(MapOp op, int len); |
michael@0 | 127 | |
michael@0 | 128 | void SetLeft(); |
michael@0 | 129 | void SetRight(); |
michael@0 | 130 | |
michael@0 | 131 | // Back up over previous range, 1..5 bytes |
michael@0 | 132 | // Return subscript at the beginning of that. Pins at 0 |
michael@0 | 133 | int Backup(int sub); |
michael@0 | 134 | |
michael@0 | 135 | // Parse next range, 1..5 bytes |
michael@0 | 136 | // Return subscript just off the end of that |
michael@0 | 137 | int ParseNext(int sub, MapOp* op, int* length); |
michael@0 | 138 | |
michael@0 | 139 | // Parse previous range, 1..5 bytes |
michael@0 | 140 | // Return current subscript |
michael@0 | 141 | int ParsePrevious(int sub, MapOp* op, int* length); |
michael@0 | 142 | |
michael@0 | 143 | void PrintPosition(const char* str); |
michael@0 | 144 | |
michael@0 | 145 | bool MoveRight(); // Returns true if OK |
michael@0 | 146 | bool MoveLeft(); // Returns true if OK |
michael@0 | 147 | void DumpString(); |
michael@0 | 148 | |
michael@0 | 149 | // Copies insert operations from source to dest. Returns true if no |
michael@0 | 150 | // other operations are found. |
michael@0 | 151 | static bool CopyInserts(OffsetMap* source, OffsetMap* dest); |
michael@0 | 152 | |
michael@0 | 153 | // Copies delete operations from source to dest. Returns true if no other |
michael@0 | 154 | // operations are found. |
michael@0 | 155 | static bool CopyDeletes(OffsetMap* source, OffsetMap* dest); |
michael@0 | 156 | |
michael@0 | 157 | std::string diffs_; |
michael@0 | 158 | MapOp pending_op_; |
michael@0 | 159 | uint32 pending_length_; |
michael@0 | 160 | |
michael@0 | 161 | // Offsets in the ranges below correspond to each other, with A' = A + diff |
michael@0 | 162 | int next_diff_sub_; |
michael@0 | 163 | int current_lo_aoffset_; |
michael@0 | 164 | int current_hi_aoffset_; |
michael@0 | 165 | int current_lo_aprimeoffset_; |
michael@0 | 166 | int current_hi_aprimeoffset_; |
michael@0 | 167 | int current_diff_; |
michael@0 | 168 | int max_aoffset_; |
michael@0 | 169 | int max_aprimeoffset_; |
michael@0 | 170 | }; |
michael@0 | 171 | |
michael@0 | 172 | } // namespace CLD2 |
michael@0 | 173 | |
michael@0 | 174 | #endif // UTIL_UTF8_OFFSETMAP_H_ |
michael@0 | 175 |