browser/components/translation/cld2/internal/offsetmap.cc

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // Copyright 2013 Google Inc. All Rights Reserved.
     2 //
     3 // Licensed under the Apache License, Version 2.0 (the "License");
     4 // you may not use this file except in compliance with the License.
     5 // You may obtain a copy of the License at
     6 //
     7 //     http://www.apache.org/licenses/LICENSE-2.0
     8 //
     9 // Unless required by applicable law or agreed to in writing, software
    10 // distributed under the License is distributed on an "AS IS" BASIS,
    11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12 // See the License for the specific language governing permissions and
    13 // limitations under the License.
    15 //
    16 // Author: dsites@google.com (Dick Sites)
    17 //
    18 //
    20 #include "offsetmap.h"
    22 #include <string.h>                     // for strcmp
    23 #include <stdio.h>                      // for fprintf, stderr, fclose, etc
    24 #include <algorithm>                    // for min
    26 using namespace std;
    28 namespace CLD2 {
    30 // Constructor, destructor
    31 OffsetMap::OffsetMap() {
    32   Clear();
    33 }
    35 OffsetMap::~OffsetMap() {
    36 }
    38 // Clear the map
    39 // After:
    40 //   next_diff_sub_ is 0
    41 //   Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
    42 //   which is a fake range of width 0 mapping 0=>0
    43 void OffsetMap::Clear() {
    44   diffs_.clear();
    45   pending_op_ = COPY_OP;
    46   pending_length_ = 0;
    47   next_diff_sub_ = 0;
    48   current_lo_aoffset_ = 0;
    49   current_hi_aoffset_ = 0;
    50   current_lo_aprimeoffset_ = 0;
    51   current_hi_aprimeoffset_ = 0;
    52   current_diff_ = 0;
    53   max_aoffset_ = 0;           // Largest seen so far
    54   max_aprimeoffset_ = 0;      // Largest seen so far
    55 }
    57 static inline char OpPart(const char c) {
    58   return (c >> 6) & 3;
    59 }
    60 static inline char LenPart(const char c) {
    61   return c & 0x3f;
    62 }
    64 // Print map to file, for debugging
    65 void OffsetMap::Printmap(const char* filename) {
    66   FILE* fout;
    67   bool needs_close = false;
    68   if (strcmp(filename, "stdout") == 0) {
    69     fout = stdout;
    70   } else if (strcmp(filename, "stderr") == 0) {
    71     fout = stderr;
    72   } else {
    73     fout = fopen(filename, "w");
    74     needs_close = true;
    75   }
    76   if (fout == NULL) {
    77     fprintf(stderr, "%s did not open\n", filename);
    78     return;
    79   }
    81   Flush();    // Make sure any pending entry gets printed
    82   fprintf(fout, "Offsetmap: %ld bytes\n", diffs_.size());
    83   for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
    84     fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
    85     if ((i % 20) == 19) {fprintf(fout, "\n");}
    86   }
    87   fprintf(fout, "\n");
    88   if (needs_close) {
    89     fclose(fout);
    90   }
    91 }
    93 // Reset to offset 0
    94 void OffsetMap::Reset() {
    95   MaybeFlushAll();
    97   next_diff_sub_ = 0;
    98   current_lo_aoffset_ = 0;
    99   current_hi_aoffset_ = 0;
   100   current_lo_aprimeoffset_ = 0;
   101   current_hi_aprimeoffset_ = 0;
   102   current_diff_ = 0;
   103 }
   105 // Add to  mapping from A to A', specifying how many next bytes are
   106 // identical in A and A'
   107 void OffsetMap::Copy(int bytes) {
   108   if (bytes == 0) {return;}
   109   max_aoffset_ += bytes;           // Largest seen so far
   110   max_aprimeoffset_ += bytes;      // Largest seen so far
   111   if (pending_op_ == COPY_OP) {
   112     pending_length_ += bytes;
   113   } else {
   114     Flush();
   115     pending_op_ = COPY_OP;
   116     pending_length_ = bytes;
   117   }
   118 }
   120 // Add to mapping from A to A', specifying how many next bytes are
   121 // inserted in A' while not advancing in A at all
   122 void OffsetMap::Insert(int bytes){
   123   if (bytes == 0) {return;}
   124   max_aprimeoffset_ += bytes;      // Largest seen so far
   125   if (pending_op_ == INSERT_OP) {
   126     pending_length_ += bytes;
   127   } else if ((bytes == 1) &&
   128              (pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
   129     // Special-case exactly delete(1) insert(1) +> copy(1);
   130     // all others backmap inserts to after deletes
   131     pending_op_ = COPY_OP;
   132   } else {
   133     Flush();
   134     pending_op_ = INSERT_OP;
   135     pending_length_ = bytes;
   136   }
   137 }
   139 // Add to mapping from A to A', specifying how many next bytes are
   140 // deleted from A while not advancing in A' at all
   141 void OffsetMap::Delete(int bytes){
   142   if (bytes == 0) {return;}
   143   max_aoffset_ += bytes;           // Largest seen so far
   144   if (pending_op_ == DELETE_OP) {
   145     pending_length_ += bytes;
   146     } else if ((bytes == 1) &&
   147                (pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
   148       // Special-case exactly insert(1) delete(1) => copy(1);
   149       // all others backmap deletes to after insertss
   150       pending_op_ = COPY_OP;
   151   } else {
   152     Flush();
   153     pending_op_ = DELETE_OP;
   154     pending_length_ = bytes;
   155   }
   156 }
   158 void OffsetMap::Flush() {
   159   if (pending_length_ == 0) {
   160     return;
   161   }
   162   // We may be emitting a copy op just after a copy op because +1 -1 cancelled
   163   // inbetween. If the lengths don't need a prefix byte, combine them
   164   if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
   165     char c = diffs_[diffs_.size() - 1];
   166     MapOp prior_op = static_cast<MapOp>(OpPart(c));
   167     int prior_len = LenPart(c);
   168     if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
   169       diffs_[diffs_.size() - 1] += pending_length_;
   170       pending_length_ = 0;
   171       return;
   172     }
   173   }
   174   if (pending_length_ > 0x3f) {
   175     bool non_zero_emitted = false;
   176     for (int shift = 30; shift > 0; shift -= 6) {
   177       int prefix = (pending_length_ >> shift) & 0x3f;
   178       if ((prefix > 0) || non_zero_emitted) {
   179         Emit(PREFIX_OP, prefix);
   180         non_zero_emitted = true;
   181       }
   182     }
   183   }
   184   Emit(pending_op_, pending_length_ & 0x3f);
   185   pending_length_ = 0;
   186 }
   189 // Add one more entry to copy one byte off the end, then flush
   190 void OffsetMap::FlushAll() {
   191   Copy(1);
   192   Flush();
   193 }
   195 // Flush all if necessary
   196 void OffsetMap::MaybeFlushAll() {
   197   if ((0 < pending_length_) || diffs_.empty()) {
   198     FlushAll();
   199   }
   200 }
   202 // Len may be 0, for example as the low piece of length=64
   203 void OffsetMap::Emit(MapOp op, int len) {
   204   char c = (static_cast<char>(op) << 6) | (len & 0x3f);
   205   diffs_.push_back(c);
   206 }
   208 void OffsetMap::DumpString() {
   209   for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
   210     fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
   211   }
   212   fprintf(stderr, "\n");
   214   // Print running table of correspondences
   215   fprintf(stderr, "       op      A =>  A'     (A forward-maps to A')\n");
   216   int aoffset = 0;
   217   int aprimeoffset = 0;
   218   int length = 0;
   219   for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
   220     char c = diffs_[i];
   221     MapOp op = static_cast<MapOp>(OpPart(c));
   222     int len = LenPart(c);
   223     length = (length << 6) + len;
   224     if (op == COPY_OP) {
   225       aoffset += length;
   226       aprimeoffset += length;
   227       length = 0;
   228     } else if (op == INSERT_OP) {
   229       aoffset += 0;
   230       aprimeoffset += length;
   231       length = 0;
   232     } else if (op == DELETE_OP) {
   233       aoffset += length;
   234       aprimeoffset += 0;
   235       length = 0;
   236     } else {              // (op == PREFIX_OP)
   237       // Do nothing else
   238     }
   239     fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n",
   240             i, "&=+-"[op], len,
   241             aoffset, aprimeoffset,
   242             (next_diff_sub_ == i) ? " <==next_diff_sub_" : "");
   244   }
   245   fprintf(stderr, "\n");
   246 }
   248 void OffsetMap::DumpWindow() {
   249   fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, "
   250                   "max_aprimeoffset_ = %d, next_diff_sub_ = %d<br>\n",
   251           max_aoffset_, max_aprimeoffset_, next_diff_sub_);
   252   fprintf(stderr, "A  [%u..%u)\n",
   253           current_lo_aoffset_, current_hi_aoffset_);
   254   fprintf(stderr, "A' [%u..%u)\n",
   255           current_lo_aprimeoffset_, current_hi_aprimeoffset_);
   256   fprintf(stderr, "  diff = %d\n", current_diff_);
   257   DumpString();
   258 }
   260 //----------------------------------------------------------------------------//
   261 // The guts of the 2013 design                                                //
   262 // If there are three ranges a b c in  diffs_, we can be in one of five       //
   263 // states: LEFT of a, in ranges a b c, or RIGHT of c                          //
   264 // In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_    //
   265 // position next_diff_sub_                                                    //
   266 // There also are mapping constants max_aoffset_ and max_aprimeoffset_        //
   267 // If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0                       //
   268 // If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and            //
   269 //   next_diff_sub_=diffs_.size()                                             //
   270 // Otherwise, at least one of A[) and A'[) is non-empty and the first bytes   //
   271 //   correspond to each other. If range i is active, next_diff_sub_ is at     //
   272 //   the first byte of range i+1. Because of the length-prefix operator,      //
   273 //   an individual range item in diffs_ may be multiple bytes                 //
   274 // In all cases aprimeoffset = aoffset + current_diff_                        //
   275 //   i.e. current_diff_ = aprimeoffset - aoffset                              //
   276 //                                                                            //
   277 // In the degenerate case of diffs_.empty(), there are only two states        //
   278 //   LEFT and RIGHT and the mapping is the identity mapping.                  //
   279 // The initial state is LEFT.                                                 //
   280 // It is an error to move left into LEFT or right into RIGHT, but the code    //
   281 //   below is robust in these cases.                                          //
   282 //----------------------------------------------------------------------------//
   284 void OffsetMap::SetLeft() {
   285    current_lo_aoffset_ = 0;
   286    current_hi_aoffset_ = 0;
   287    current_lo_aprimeoffset_ = 0;
   288    current_hi_aprimeoffset_ = 0;
   289    current_diff_ = 0;
   290    next_diff_sub_ = 0;
   291 }
   293 void OffsetMap::SetRight() {
   294    current_lo_aoffset_ = max_aoffset_;
   295    current_hi_aoffset_ = max_aoffset_;
   296    current_lo_aprimeoffset_ = max_aprimeoffset_;
   297    current_hi_aprimeoffset_ = max_aprimeoffset_;
   298    current_diff_ = max_aprimeoffset_ - max_aoffset_;
   299    next_diff_sub_ = 0;
   300 }
   302 // Back up over previous range, 1..5 bytes
   303 // Return subscript at the beginning of that. Pins at 0
   304 int OffsetMap::Backup(int sub) {
   305   if (sub <= 0) {return 0;}
   306   --sub;
   307   while ((0 < sub) &&
   308          (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
   309     --sub;
   310   }
   311   return sub;
   312 }
   314 // Parse next range, 1..5 bytes
   315 // Return subscript just off the end of that
   316 int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
   317    *op = PREFIX_OP;
   318    *length = 0;
   319    char c;
   320    while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
   321      c = diffs_[sub++];
   322      *op = static_cast<MapOp>(OpPart(c));
   323      int len = LenPart(c);
   324      *length = (*length << 6) + len;
   325    }
   326    // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
   327    // Mal-formed can include a trailing prefix byte with no following op
   328    return sub;
   329 }
   331 // Parse previous range, 1..5 bytes
   332 // Return current subscript
   333 int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
   334   sub = Backup(sub);
   335   return ParseNext(sub, op, length);
   336 }
   338 // Quick debugging dump; does not parse multi-byte items, so just length & 0x3f
   339 void OffsetMap::PrintPosition(const char* str) {
   340   MapOp op = PREFIX_OP;
   341   int length = 0;
   342   if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast<int>(diffs_.size()))) {
   343     op = static_cast<MapOp>(OpPart(diffs_[next_diff_sub_ - 1]));
   344     length = LenPart(diffs_[next_diff_sub_ - 1]);
   345   }
   346   fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n",
   347           str,
   348           next_diff_sub_, "&=+-"[op], length,
   349           current_lo_aoffset_, current_hi_aoffset_,
   350           current_lo_aprimeoffset_, current_hi_aprimeoffset_);
   351 }
   353 // Move active window one range to the right
   354 // Return true if move was OK
   355 bool OffsetMap::MoveRight() {
   356   // If at last range or RIGHT, set to RIGHT, return error
   357   if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
   358     SetRight();
   359     return false;
   360   }
   361   // Actually OK to move right
   362   MapOp op;
   363   int length;
   364   bool retval = true;
   365   // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
   366   next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
   368   current_lo_aoffset_ = current_hi_aoffset_;
   369   current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
   370   if (op == COPY_OP) {
   371     current_hi_aoffset_ = current_lo_aoffset_ + length;
   372     current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
   373   } else if (op == INSERT_OP) {
   374     current_hi_aoffset_ = current_lo_aoffset_ + 0;
   375     current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
   376   } else if (op == DELETE_OP) {
   377     current_hi_aoffset_ = current_lo_aoffset_ + length;
   378     current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
   379   } else {
   380     SetRight();
   381     retval = false;
   382   }
   383   current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
   384   return retval;
   385 }
   387 // Move active window one range to the left
   388 // Return true if move was OK
   389 bool OffsetMap::MoveLeft() {
   390   // If at first range or LEFT, set to LEFT, return error
   391   if (next_diff_sub_ <= 0) {
   392     SetLeft();
   393     return false;
   394   }
   395   // Back up over current active window
   396   next_diff_sub_ = Backup(next_diff_sub_);
   397   if (next_diff_sub_ <= 0) {
   398     SetLeft();
   399     return false;
   400   }
   401   // Actually OK to move left
   402   MapOp op;
   403   int length;
   404   bool retval = true;
   405   // If mal-formed or in LEFT, this will return with op = PREFIX_OP
   406   next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
   408   current_hi_aoffset_ = current_lo_aoffset_;
   409   current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
   410   if (op == COPY_OP) {
   411     current_lo_aoffset_ = current_hi_aoffset_ - length;
   412     current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
   413   } else if (op == INSERT_OP) {
   414     current_lo_aoffset_ = current_hi_aoffset_ - 0;
   415     current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
   416   } else if (op == DELETE_OP) {
   417     current_lo_aoffset_ = current_hi_aoffset_ - length;
   418     current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
   419   } else {
   420     SetLeft();
   421     retval = false;
   422   }
   423   current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
   424   return true;
   425 }
   427 // Map an offset in A' to the corresponding offset in A
   428 int OffsetMap::MapBack(int aprimeoffset){
   429   MaybeFlushAll();
   430   if (aprimeoffset < 0) {return 0;}
   431   if (max_aprimeoffset_ <= aprimeoffset) {
   432      return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
   433   }
   435   // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
   436   // use current mapping, else move window left/right
   437   bool ok = true;
   438   while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
   439     ok = MoveLeft();
   440   }
   441   while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
   442     ok = MoveRight();
   443   }
   444   // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
   446   int aoffset = aprimeoffset - current_diff_;
   447   if (aoffset >= current_hi_aoffset_) {
   448     // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
   449     aoffset = current_hi_aoffset_;
   450   }
   451   return aoffset;
   452 }
   454 // Map an offset in A to the corresponding offset in A'
   455 int OffsetMap::MapForward(int aoffset){
   456   MaybeFlushAll();
   457   if (aoffset < 0) {return 0;}
   458   if (max_aoffset_ <= aoffset) {
   459      return (aoffset - max_aoffset_) + max_aprimeoffset_;
   460   }
   462   // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
   463   // use current mapping, else move window left/right
   464   bool ok = true;
   465   while (ok && (aoffset < current_lo_aoffset_)) {
   466     ok = MoveLeft();
   467   }
   468   while (ok && (current_hi_aoffset_ <= aoffset)) {
   469     ok = MoveRight();
   470   }
   472   int aprimeoffset = aoffset + current_diff_;
   473   if (aprimeoffset >= current_hi_aprimeoffset_) {
   474     // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
   475     aprimeoffset = current_hi_aprimeoffset_;
   476   }
   477   return aprimeoffset;
   478 }
   481 // static
   482 bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
   483   bool ok = true;
   484   while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
   485     ok = source->MoveRight();
   486     if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
   487       return false;
   488     }
   489     dest->Insert(
   490         source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
   491   }
   492   return true;
   493 }
   495 // static
   496 bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
   497   bool ok = true;
   498   while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
   499     ok = source->MoveRight();
   500     if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
   501       return false;
   502     }
   503     dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
   504   }
   505   return true;
   506 }
   508 // static
   509 void OffsetMap::ComposeOffsetMap(
   510     OffsetMap* g, OffsetMap* f, OffsetMap* h) {
   511   h->Clear();
   512   f->Reset();
   513   g->Reset();
   515   int lo = 0;
   516   for (;;) {
   517      // Consume delete operations in f. This moves A without moving
   518      // A' and A''.
   519      if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
   520        if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
   521           // fprintf(stderr,
   522           //         "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
   523        }
   525        // FlushAll(), called by Reset(), MapForward() or MapBack(), has
   526        // added an extra COPY_OP to f and g, so this function has
   527        // composed an extra COPY_OP in h from those. To avoid
   528        // FlushAll() adds one more extra COPY_OP to h later, dispatch
   529        // Flush() right now.
   530        h->Flush();
   531        return;
   532      }
   534      // Consume insert operations in g. This moves A'' without moving A
   535      // and A'.
   536      if (lo >= f->current_hi_aprimeoffset_) {
   537        if (!CopyDeletes(f, h)) {
   538           // fprintf(stderr,
   539           //         "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
   540        }
   541      }
   543      // Compose one operation which moves A' from lo to hi.
   544      int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
   545      if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
   546          g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
   547        h->Copy(hi - lo);
   548      } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
   549        h->Delete(hi - lo);
   550      } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
   551        h->Insert(hi - lo);
   552      }
   554      lo = hi;
   555   }
   556 }
   558 // For testing only -- force a mapping
   559 void OffsetMap::StuffIt(const string& diffs,
   560                         int max_aoffset, int max_aprimeoffset) {
   561   Clear();
   562   diffs_ = diffs;
   563   max_aoffset_ = max_aoffset;
   564   max_aprimeoffset_ = max_aprimeoffset;
   565 }
   568 }  // namespace CLD2

mercurial