browser/components/translation/cld2/internal/offsetmap.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // Copyright 2013 Google Inc. All Rights Reserved.
     2 //
     3 // Licensed under the Apache License, Version 2.0 (the "License");
     4 // you may not use this file except in compliance with the License.
     5 // You may obtain a copy of the License at
     6 //
     7 //     http://www.apache.org/licenses/LICENSE-2.0
     8 //
     9 // Unless required by applicable law or agreed to in writing, software
    10 // distributed under the License is distributed on an "AS IS" BASIS,
    11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12 // See the License for the specific language governing permissions and
    13 // limitations under the License.
    15 //
    16 // Author: dsites@google.com (Dick Sites)
    17 //
    19 #ifndef UTIL_UTF8_OFFSETMAP_H_
    20 #define UTIL_UTF8_OFFSETMAP_H_
    22 #include <string>                       // for string
    23 #include "integral_types.h"             // for uint32
    25 // ***************************** OffsetMap **************************
    26 //
    27 // An OffsetMap object is a container for a mapping from offsets in one text
    28 // buffer A' to offsets in another text buffer A. It is most useful when A' is
    29 // built from A via substitutions that occasionally do not preserve byte length.
    30 //
    31 // A series of operators are used to build the correspondence map, then
    32 // calls can be made to map an offset in A' to an offset in A, or vice versa.
    33 // The map starts with offset 0 in A corresponding to offset 0 in A'.
    34 // The mapping is then built sequentially, adding on byte ranges that are
    35 // identical in A and A', byte ranges that are inserted in A', and byte ranges
    36 // that are deleted from A. All bytes beyond those specified when building the
    37 // map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
    38 // end of the map.
    39 //
    40 // The internal data structure records positions at which bytes are added or
    41 // deleted. Using the map is O(1) when increasing the A' or A offset
    42 // monotonically, and O(n) when accessing random offsets, where n is the
    43 // number of differences.
    44 //
    46 namespace CLD2 {
    48 class OffsetMap {
    49  public:
    50   // Constructor, destructor
    51   OffsetMap();
    52   ~OffsetMap();
    54   // Clear the map
    55   void Clear();
    57   // Add to  mapping from A to A', specifying how many next bytes correspond
    58   // in A and A'
    59   void Copy(int bytes);
    61   // Add to mapping from A to A', specifying how many next bytes are
    62   // inserted in A' while not advancing in A at all
    63   void Insert(int bytes);
    65   // Add to mapping from A to A', specifying how many next bytes are
    66   // deleted from A while not advancing in A' at all
    67   void Delete(int bytes);
    69   // Print map to file, for debugging
    70   void Printmap(const char* filename);
    72   // [Finish building map,] Re-position to offset 0
    73   // This call is optional; MapForward and MapBack finish building the map
    74   // if necessary
    75   void Reset();
    77   // Map an offset in A' to the corresponding offset in A
    78   int MapBack(int aprimeoffset);
    80   // Map an offset in A to the corresponding offset in A'
    81   int MapForward(int aoffset);
    83   // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
    84   // from A' to A'' and h is from A to A''.
    85   //
    86   // Note that g->MoveForward(f->MoveForward(aoffset)) always equals
    87   // to h->MoveForward(aoffset), while
    88   // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
    89   // to h->MoveBack(aprimeprimeoffset). This happens when deletion in
    90   // f and insertion in g are at the same place.  For example,
    91   //
    92   // A    1   2   3   4
    93   //      ^   |  ^    ^
    94   //      |   | /     |  f
    95   //      v   vv      v
    96   // A'   1'  2'      3'
    97   //      ^   ^^      ^
    98   //      |   | \     |  g
    99   //      v   |  v    v
   100   // A''  1'' 2'' 3'' 4''
   101   //
   102   // results in:
   103   //
   104   // A    1   2   3   4
   105   //      ^   ^\  ^   ^
   106   //      |   | \ |   |  h
   107   //      v   |  vv   v
   108   // A''  1'' 2'' 3'' 4''
   109   //
   110   // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
   111   // the latter figure.
   112   static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
   114   // For debugging only; writes to stderr
   115   void DumpWindow();
   117   // For testing only -- force a mapping
   118   void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
   120  private:
   121   enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
   123   void Flush();
   124   void FlushAll();
   125   void MaybeFlushAll();
   126   void Emit(MapOp op, int len);
   128   void SetLeft();
   129   void SetRight();
   131   // Back up over previous range, 1..5 bytes
   132   // Return subscript at the beginning of that. Pins at 0
   133   int Backup(int sub);
   135   // Parse next range, 1..5 bytes
   136   // Return subscript just off the end of that
   137   int ParseNext(int sub, MapOp* op, int* length);
   139   // Parse previous range, 1..5 bytes
   140   // Return current subscript
   141   int ParsePrevious(int sub, MapOp* op, int* length);
   143   void PrintPosition(const char* str);
   145   bool MoveRight();     // Returns true if OK
   146   bool MoveLeft();      // Returns true if OK
   147   void DumpString();
   149   // Copies insert operations from source to dest. Returns true if no
   150   // other operations are found.
   151   static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
   153   // Copies delete operations from source to dest. Returns true if no other
   154   // operations are found.
   155   static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
   157   std::string diffs_;
   158   MapOp pending_op_;
   159   uint32 pending_length_;
   161   // Offsets in the ranges below correspond to each other, with A' = A + diff
   162   int next_diff_sub_;
   163   int current_lo_aoffset_;
   164   int current_hi_aoffset_;
   165   int current_lo_aprimeoffset_;
   166   int current_hi_aprimeoffset_;
   167   int current_diff_;
   168   int max_aoffset_;
   169   int max_aprimeoffset_;
   170 };
   172 }  // namespace CLD2
   174 #endif  // UTIL_UTF8_OFFSETMAP_H_

mercurial