browser/components/translation/cld2/internal/offsetmap.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/browser/components/translation/cld2/internal/offsetmap.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,569 @@
     1.4 +// Copyright 2013 Google Inc. All Rights Reserved.
     1.5 +//
     1.6 +// Licensed under the Apache License, Version 2.0 (the "License");
     1.7 +// you may not use this file except in compliance with the License.
     1.8 +// You may obtain a copy of the License at
     1.9 +//
    1.10 +//     http://www.apache.org/licenses/LICENSE-2.0
    1.11 +//
    1.12 +// Unless required by applicable law or agreed to in writing, software
    1.13 +// distributed under the License is distributed on an "AS IS" BASIS,
    1.14 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    1.15 +// See the License for the specific language governing permissions and
    1.16 +// limitations under the License.
    1.17 +
    1.18 +//
    1.19 +// Author: dsites@google.com (Dick Sites)
    1.20 +//
    1.21 +//
    1.22 +
    1.23 +#include "offsetmap.h"
    1.24 +
    1.25 +#include <string.h>                     // for strcmp
    1.26 +#include <stdio.h>                      // for fprintf, stderr, fclose, etc
    1.27 +#include <algorithm>                    // for min
    1.28 +
    1.29 +using namespace std;
    1.30 +
    1.31 +namespace CLD2 {
    1.32 +
    1.33 +// Constructor, destructor
    1.34 +OffsetMap::OffsetMap() {
    1.35 +  Clear();
    1.36 +}
    1.37 +
    1.38 +OffsetMap::~OffsetMap() {
    1.39 +}
    1.40 +
    1.41 +// Clear the map
    1.42 +// After:
    1.43 +//   next_diff_sub_ is 0
    1.44 +//   Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
    1.45 +//   which is a fake range of width 0 mapping 0=>0
    1.46 +void OffsetMap::Clear() {
    1.47 +  diffs_.clear();
    1.48 +  pending_op_ = COPY_OP;
    1.49 +  pending_length_ = 0;
    1.50 +  next_diff_sub_ = 0;
    1.51 +  current_lo_aoffset_ = 0;
    1.52 +  current_hi_aoffset_ = 0;
    1.53 +  current_lo_aprimeoffset_ = 0;
    1.54 +  current_hi_aprimeoffset_ = 0;
    1.55 +  current_diff_ = 0;
    1.56 +  max_aoffset_ = 0;           // Largest seen so far
    1.57 +  max_aprimeoffset_ = 0;      // Largest seen so far
    1.58 +}
    1.59 +
    1.60 +static inline char OpPart(const char c) {
    1.61 +  return (c >> 6) & 3;
    1.62 +}
    1.63 +static inline char LenPart(const char c) {
    1.64 +  return c & 0x3f;
    1.65 +}
    1.66 +
    1.67 +// Print map to file, for debugging
    1.68 +void OffsetMap::Printmap(const char* filename) {
    1.69 +  FILE* fout;
    1.70 +  bool needs_close = false;
    1.71 +  if (strcmp(filename, "stdout") == 0) {
    1.72 +    fout = stdout;
    1.73 +  } else if (strcmp(filename, "stderr") == 0) {
    1.74 +    fout = stderr;
    1.75 +  } else {
    1.76 +    fout = fopen(filename, "w");
    1.77 +    needs_close = true;
    1.78 +  }
    1.79 +  if (fout == NULL) {
    1.80 +    fprintf(stderr, "%s did not open\n", filename);
    1.81 +    return;
    1.82 +  }
    1.83 +
    1.84 +  Flush();    // Make sure any pending entry gets printed
    1.85 +  fprintf(fout, "Offsetmap: %ld bytes\n", diffs_.size());
    1.86 +  for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
    1.87 +    fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
    1.88 +    if ((i % 20) == 19) {fprintf(fout, "\n");}
    1.89 +  }
    1.90 +  fprintf(fout, "\n");
    1.91 +  if (needs_close) {
    1.92 +    fclose(fout);
    1.93 +  }
    1.94 +}
    1.95 +
    1.96 +// Reset to offset 0
    1.97 +void OffsetMap::Reset() {
    1.98 +  MaybeFlushAll();
    1.99 +
   1.100 +  next_diff_sub_ = 0;
   1.101 +  current_lo_aoffset_ = 0;
   1.102 +  current_hi_aoffset_ = 0;
   1.103 +  current_lo_aprimeoffset_ = 0;
   1.104 +  current_hi_aprimeoffset_ = 0;
   1.105 +  current_diff_ = 0;
   1.106 +}
   1.107 +
   1.108 +// Add to  mapping from A to A', specifying how many next bytes are
   1.109 +// identical in A and A'
   1.110 +void OffsetMap::Copy(int bytes) {
   1.111 +  if (bytes == 0) {return;}
   1.112 +  max_aoffset_ += bytes;           // Largest seen so far
   1.113 +  max_aprimeoffset_ += bytes;      // Largest seen so far
   1.114 +  if (pending_op_ == COPY_OP) {
   1.115 +    pending_length_ += bytes;
   1.116 +  } else {
   1.117 +    Flush();
   1.118 +    pending_op_ = COPY_OP;
   1.119 +    pending_length_ = bytes;
   1.120 +  }
   1.121 +}
   1.122 +
   1.123 +// Add to mapping from A to A', specifying how many next bytes are
   1.124 +// inserted in A' while not advancing in A at all
   1.125 +void OffsetMap::Insert(int bytes){
   1.126 +  if (bytes == 0) {return;}
   1.127 +  max_aprimeoffset_ += bytes;      // Largest seen so far
   1.128 +  if (pending_op_ == INSERT_OP) {
   1.129 +    pending_length_ += bytes;
   1.130 +  } else if ((bytes == 1) &&
   1.131 +             (pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
   1.132 +    // Special-case exactly delete(1) insert(1) +> copy(1);
   1.133 +    // all others backmap inserts to after deletes
   1.134 +    pending_op_ = COPY_OP;
   1.135 +  } else {
   1.136 +    Flush();
   1.137 +    pending_op_ = INSERT_OP;
   1.138 +    pending_length_ = bytes;
   1.139 +  }
   1.140 +}
   1.141 +
   1.142 +// Add to mapping from A to A', specifying how many next bytes are
   1.143 +// deleted from A while not advancing in A' at all
   1.144 +void OffsetMap::Delete(int bytes){
   1.145 +  if (bytes == 0) {return;}
   1.146 +  max_aoffset_ += bytes;           // Largest seen so far
   1.147 +  if (pending_op_ == DELETE_OP) {
   1.148 +    pending_length_ += bytes;
   1.149 +    } else if ((bytes == 1) &&
   1.150 +               (pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
   1.151 +      // Special-case exactly insert(1) delete(1) => copy(1);
   1.152 +      // all others backmap deletes to after insertss
   1.153 +      pending_op_ = COPY_OP;
   1.154 +  } else {
   1.155 +    Flush();
   1.156 +    pending_op_ = DELETE_OP;
   1.157 +    pending_length_ = bytes;
   1.158 +  }
   1.159 +}
   1.160 +
   1.161 +void OffsetMap::Flush() {
   1.162 +  if (pending_length_ == 0) {
   1.163 +    return;
   1.164 +  }
   1.165 +  // We may be emitting a copy op just after a copy op because +1 -1 cancelled
   1.166 +  // inbetween. If the lengths don't need a prefix byte, combine them
   1.167 +  if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
   1.168 +    char c = diffs_[diffs_.size() - 1];
   1.169 +    MapOp prior_op = static_cast<MapOp>(OpPart(c));
   1.170 +    int prior_len = LenPart(c);
   1.171 +    if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
   1.172 +      diffs_[diffs_.size() - 1] += pending_length_;
   1.173 +      pending_length_ = 0;
   1.174 +      return;
   1.175 +    }
   1.176 +  }
   1.177 +  if (pending_length_ > 0x3f) {
   1.178 +    bool non_zero_emitted = false;
   1.179 +    for (int shift = 30; shift > 0; shift -= 6) {
   1.180 +      int prefix = (pending_length_ >> shift) & 0x3f;
   1.181 +      if ((prefix > 0) || non_zero_emitted) {
   1.182 +        Emit(PREFIX_OP, prefix);
   1.183 +        non_zero_emitted = true;
   1.184 +      }
   1.185 +    }
   1.186 +  }
   1.187 +  Emit(pending_op_, pending_length_ & 0x3f);
   1.188 +  pending_length_ = 0;
   1.189 +}
   1.190 +
   1.191 +
   1.192 +// Add one more entry to copy one byte off the end, then flush
   1.193 +void OffsetMap::FlushAll() {
   1.194 +  Copy(1);
   1.195 +  Flush();
   1.196 +}
   1.197 +
   1.198 +// Flush all if necessary
   1.199 +void OffsetMap::MaybeFlushAll() {
   1.200 +  if ((0 < pending_length_) || diffs_.empty()) {
   1.201 +    FlushAll();
   1.202 +  }
   1.203 +}
   1.204 +
   1.205 +// Len may be 0, for example as the low piece of length=64
   1.206 +void OffsetMap::Emit(MapOp op, int len) {
   1.207 +  char c = (static_cast<char>(op) << 6) | (len & 0x3f);
   1.208 +  diffs_.push_back(c);
   1.209 +}
   1.210 +
   1.211 +void OffsetMap::DumpString() {
   1.212 +  for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
   1.213 +    fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
   1.214 +  }
   1.215 +  fprintf(stderr, "\n");
   1.216 +
   1.217 +  // Print running table of correspondences
   1.218 +  fprintf(stderr, "       op      A =>  A'     (A forward-maps to A')\n");
   1.219 +  int aoffset = 0;
   1.220 +  int aprimeoffset = 0;
   1.221 +  int length = 0;
   1.222 +  for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
   1.223 +    char c = diffs_[i];
   1.224 +    MapOp op = static_cast<MapOp>(OpPart(c));
   1.225 +    int len = LenPart(c);
   1.226 +    length = (length << 6) + len;
   1.227 +    if (op == COPY_OP) {
   1.228 +      aoffset += length;
   1.229 +      aprimeoffset += length;
   1.230 +      length = 0;
   1.231 +    } else if (op == INSERT_OP) {
   1.232 +      aoffset += 0;
   1.233 +      aprimeoffset += length;
   1.234 +      length = 0;
   1.235 +    } else if (op == DELETE_OP) {
   1.236 +      aoffset += length;
   1.237 +      aprimeoffset += 0;
   1.238 +      length = 0;
   1.239 +    } else {              // (op == PREFIX_OP)
   1.240 +      // Do nothing else
   1.241 +    }
   1.242 +    fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n",
   1.243 +            i, "&=+-"[op], len,
   1.244 +            aoffset, aprimeoffset,
   1.245 +            (next_diff_sub_ == i) ? " <==next_diff_sub_" : "");
   1.246 +
   1.247 +  }
   1.248 +  fprintf(stderr, "\n");
   1.249 +}
   1.250 +
   1.251 +void OffsetMap::DumpWindow() {
   1.252 +  fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, "
   1.253 +                  "max_aprimeoffset_ = %d, next_diff_sub_ = %d<br>\n",
   1.254 +          max_aoffset_, max_aprimeoffset_, next_diff_sub_);
   1.255 +  fprintf(stderr, "A  [%u..%u)\n",
   1.256 +          current_lo_aoffset_, current_hi_aoffset_);
   1.257 +  fprintf(stderr, "A' [%u..%u)\n",
   1.258 +          current_lo_aprimeoffset_, current_hi_aprimeoffset_);
   1.259 +  fprintf(stderr, "  diff = %d\n", current_diff_);
   1.260 +  DumpString();
   1.261 +}
   1.262 +
   1.263 +//----------------------------------------------------------------------------//
   1.264 +// The guts of the 2013 design                                                //
   1.265 +// If there are three ranges a b c in  diffs_, we can be in one of five       //
   1.266 +// states: LEFT of a, in ranges a b c, or RIGHT of c                          //
   1.267 +// In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_    //
   1.268 +// position next_diff_sub_                                                    //
   1.269 +// There also are mapping constants max_aoffset_ and max_aprimeoffset_        //
   1.270 +// If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0                       //
   1.271 +// If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and            //
   1.272 +//   next_diff_sub_=diffs_.size()                                             //
   1.273 +// Otherwise, at least one of A[) and A'[) is non-empty and the first bytes   //
   1.274 +//   correspond to each other. If range i is active, next_diff_sub_ is at     //
   1.275 +//   the first byte of range i+1. Because of the length-prefix operator,      //
   1.276 +//   an individual range item in diffs_ may be multiple bytes                 //
   1.277 +// In all cases aprimeoffset = aoffset + current_diff_                        //
   1.278 +//   i.e. current_diff_ = aprimeoffset - aoffset                              //
   1.279 +//                                                                            //
   1.280 +// In the degenerate case of diffs_.empty(), there are only two states        //
   1.281 +//   LEFT and RIGHT and the mapping is the identity mapping.                  //
   1.282 +// The initial state is LEFT.                                                 //
   1.283 +// It is an error to move left into LEFT or right into RIGHT, but the code    //
   1.284 +//   below is robust in these cases.                                          //
   1.285 +//----------------------------------------------------------------------------//
   1.286 +
   1.287 +void OffsetMap::SetLeft() {
   1.288 +   current_lo_aoffset_ = 0;
   1.289 +   current_hi_aoffset_ = 0;
   1.290 +   current_lo_aprimeoffset_ = 0;
   1.291 +   current_hi_aprimeoffset_ = 0;
   1.292 +   current_diff_ = 0;
   1.293 +   next_diff_sub_ = 0;
   1.294 +}
   1.295 +
   1.296 +void OffsetMap::SetRight() {
   1.297 +   current_lo_aoffset_ = max_aoffset_;
   1.298 +   current_hi_aoffset_ = max_aoffset_;
   1.299 +   current_lo_aprimeoffset_ = max_aprimeoffset_;
   1.300 +   current_hi_aprimeoffset_ = max_aprimeoffset_;
   1.301 +   current_diff_ = max_aprimeoffset_ - max_aoffset_;
   1.302 +   next_diff_sub_ = 0;
   1.303 +}
   1.304 +
   1.305 +// Back up over previous range, 1..5 bytes
   1.306 +// Return subscript at the beginning of that. Pins at 0
   1.307 +int OffsetMap::Backup(int sub) {
   1.308 +  if (sub <= 0) {return 0;}
   1.309 +  --sub;
   1.310 +  while ((0 < sub) &&
   1.311 +         (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
   1.312 +    --sub;
   1.313 +  }
   1.314 +  return sub;
   1.315 +}
   1.316 +
   1.317 +// Parse next range, 1..5 bytes
   1.318 +// Return subscript just off the end of that
   1.319 +int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
   1.320 +   *op = PREFIX_OP;
   1.321 +   *length = 0;
   1.322 +   char c;
   1.323 +   while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
   1.324 +     c = diffs_[sub++];
   1.325 +     *op = static_cast<MapOp>(OpPart(c));
   1.326 +     int len = LenPart(c);
   1.327 +     *length = (*length << 6) + len;
   1.328 +   }
   1.329 +   // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
   1.330 +   // Mal-formed can include a trailing prefix byte with no following op
   1.331 +   return sub;
   1.332 +}
   1.333 +
   1.334 +// Parse previous range, 1..5 bytes
   1.335 +// Return current subscript
   1.336 +int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
   1.337 +  sub = Backup(sub);
   1.338 +  return ParseNext(sub, op, length);
   1.339 +}
   1.340 +
   1.341 +// Quick debugging dump; does not parse multi-byte items, so just length & 0x3f
   1.342 +void OffsetMap::PrintPosition(const char* str) {
   1.343 +  MapOp op = PREFIX_OP;
   1.344 +  int length = 0;
   1.345 +  if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast<int>(diffs_.size()))) {
   1.346 +    op = static_cast<MapOp>(OpPart(diffs_[next_diff_sub_ - 1]));
   1.347 +    length = LenPart(diffs_[next_diff_sub_ - 1]);
   1.348 +  }
   1.349 +  fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n",
   1.350 +          str,
   1.351 +          next_diff_sub_, "&=+-"[op], length,
   1.352 +          current_lo_aoffset_, current_hi_aoffset_,
   1.353 +          current_lo_aprimeoffset_, current_hi_aprimeoffset_);
   1.354 +}
   1.355 +
   1.356 +// Move active window one range to the right
   1.357 +// Return true if move was OK
   1.358 +bool OffsetMap::MoveRight() {
   1.359 +  // If at last range or RIGHT, set to RIGHT, return error
   1.360 +  if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
   1.361 +    SetRight();
   1.362 +    return false;
   1.363 +  }
   1.364 +  // Actually OK to move right
   1.365 +  MapOp op;
   1.366 +  int length;
   1.367 +  bool retval = true;
   1.368 +  // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
   1.369 +  next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
   1.370 +
   1.371 +  current_lo_aoffset_ = current_hi_aoffset_;
   1.372 +  current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
   1.373 +  if (op == COPY_OP) {
   1.374 +    current_hi_aoffset_ = current_lo_aoffset_ + length;
   1.375 +    current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
   1.376 +  } else if (op == INSERT_OP) {
   1.377 +    current_hi_aoffset_ = current_lo_aoffset_ + 0;
   1.378 +    current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
   1.379 +  } else if (op == DELETE_OP) {
   1.380 +    current_hi_aoffset_ = current_lo_aoffset_ + length;
   1.381 +    current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
   1.382 +  } else {
   1.383 +    SetRight();
   1.384 +    retval = false;
   1.385 +  }
   1.386 +  current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
   1.387 +  return retval;
   1.388 +}
   1.389 +
   1.390 +// Move active window one range to the left
   1.391 +// Return true if move was OK
   1.392 +bool OffsetMap::MoveLeft() {
   1.393 +  // If at first range or LEFT, set to LEFT, return error
   1.394 +  if (next_diff_sub_ <= 0) {
   1.395 +    SetLeft();
   1.396 +    return false;
   1.397 +  }
   1.398 +  // Back up over current active window
   1.399 +  next_diff_sub_ = Backup(next_diff_sub_);
   1.400 +  if (next_diff_sub_ <= 0) {
   1.401 +    SetLeft();
   1.402 +    return false;
   1.403 +  }
   1.404 +  // Actually OK to move left
   1.405 +  MapOp op;
   1.406 +  int length;
   1.407 +  bool retval = true;
   1.408 +  // If mal-formed or in LEFT, this will return with op = PREFIX_OP
   1.409 +  next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
   1.410 +
   1.411 +  current_hi_aoffset_ = current_lo_aoffset_;
   1.412 +  current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
   1.413 +  if (op == COPY_OP) {
   1.414 +    current_lo_aoffset_ = current_hi_aoffset_ - length;
   1.415 +    current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
   1.416 +  } else if (op == INSERT_OP) {
   1.417 +    current_lo_aoffset_ = current_hi_aoffset_ - 0;
   1.418 +    current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
   1.419 +  } else if (op == DELETE_OP) {
   1.420 +    current_lo_aoffset_ = current_hi_aoffset_ - length;
   1.421 +    current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
   1.422 +  } else {
   1.423 +    SetLeft();
   1.424 +    retval = false;
   1.425 +  }
   1.426 +  current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
   1.427 +  return true;
   1.428 +}
   1.429 +
   1.430 +// Map an offset in A' to the corresponding offset in A
   1.431 +int OffsetMap::MapBack(int aprimeoffset){
   1.432 +  MaybeFlushAll();
   1.433 +  if (aprimeoffset < 0) {return 0;}
   1.434 +  if (max_aprimeoffset_ <= aprimeoffset) {
   1.435 +     return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
   1.436 +  }
   1.437 +
   1.438 +  // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
   1.439 +  // use current mapping, else move window left/right
   1.440 +  bool ok = true;
   1.441 +  while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
   1.442 +    ok = MoveLeft();
   1.443 +  }
   1.444 +  while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
   1.445 +    ok = MoveRight();
   1.446 +  }
   1.447 +  // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
   1.448 +
   1.449 +  int aoffset = aprimeoffset - current_diff_;
   1.450 +  if (aoffset >= current_hi_aoffset_) {
   1.451 +    // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
   1.452 +    aoffset = current_hi_aoffset_;
   1.453 +  }
   1.454 +  return aoffset;
   1.455 +}
   1.456 +
   1.457 +// Map an offset in A to the corresponding offset in A'
   1.458 +int OffsetMap::MapForward(int aoffset){
   1.459 +  MaybeFlushAll();
   1.460 +  if (aoffset < 0) {return 0;}
   1.461 +  if (max_aoffset_ <= aoffset) {
   1.462 +     return (aoffset - max_aoffset_) + max_aprimeoffset_;
   1.463 +  }
   1.464 +
   1.465 +  // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
   1.466 +  // use current mapping, else move window left/right
   1.467 +  bool ok = true;
   1.468 +  while (ok && (aoffset < current_lo_aoffset_)) {
   1.469 +    ok = MoveLeft();
   1.470 +  }
   1.471 +  while (ok && (current_hi_aoffset_ <= aoffset)) {
   1.472 +    ok = MoveRight();
   1.473 +  }
   1.474 +
   1.475 +  int aprimeoffset = aoffset + current_diff_;
   1.476 +  if (aprimeoffset >= current_hi_aprimeoffset_) {
   1.477 +    // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
   1.478 +    aprimeoffset = current_hi_aprimeoffset_;
   1.479 +  }
   1.480 +  return aprimeoffset;
   1.481 +}
   1.482 +
   1.483 +
   1.484 +// static
   1.485 +bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
   1.486 +  bool ok = true;
   1.487 +  while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
   1.488 +    ok = source->MoveRight();
   1.489 +    if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
   1.490 +      return false;
   1.491 +    }
   1.492 +    dest->Insert(
   1.493 +        source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
   1.494 +  }
   1.495 +  return true;
   1.496 +}
   1.497 +
   1.498 +// static
   1.499 +bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
   1.500 +  bool ok = true;
   1.501 +  while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
   1.502 +    ok = source->MoveRight();
   1.503 +    if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
   1.504 +      return false;
   1.505 +    }
   1.506 +    dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
   1.507 +  }
   1.508 +  return true;
   1.509 +}
   1.510 +
   1.511 +// static
   1.512 +void OffsetMap::ComposeOffsetMap(
   1.513 +    OffsetMap* g, OffsetMap* f, OffsetMap* h) {
   1.514 +  h->Clear();
   1.515 +  f->Reset();
   1.516 +  g->Reset();
   1.517 +
   1.518 +  int lo = 0;
   1.519 +  for (;;) {
   1.520 +     // Consume delete operations in f. This moves A without moving
   1.521 +     // A' and A''.
   1.522 +     if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
   1.523 +       if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
   1.524 +          // fprintf(stderr,
   1.525 +          //         "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
   1.526 +       }
   1.527 +
   1.528 +       // FlushAll(), called by Reset(), MapForward() or MapBack(), has
   1.529 +       // added an extra COPY_OP to f and g, so this function has
   1.530 +       // composed an extra COPY_OP in h from those. To avoid
   1.531 +       // FlushAll() adds one more extra COPY_OP to h later, dispatch
   1.532 +       // Flush() right now.
   1.533 +       h->Flush();
   1.534 +       return;
   1.535 +     }
   1.536 +
   1.537 +     // Consume insert operations in g. This moves A'' without moving A
   1.538 +     // and A'.
   1.539 +     if (lo >= f->current_hi_aprimeoffset_) {
   1.540 +       if (!CopyDeletes(f, h)) {
   1.541 +          // fprintf(stderr,
   1.542 +          //         "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
   1.543 +       }
   1.544 +     }
   1.545 +
   1.546 +     // Compose one operation which moves A' from lo to hi.
   1.547 +     int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
   1.548 +     if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
   1.549 +         g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
   1.550 +       h->Copy(hi - lo);
   1.551 +     } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
   1.552 +       h->Delete(hi - lo);
   1.553 +     } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
   1.554 +       h->Insert(hi - lo);
   1.555 +     }
   1.556 +
   1.557 +     lo = hi;
   1.558 +  }
   1.559 +}
   1.560 +
   1.561 +// For testing only -- force a mapping
   1.562 +void OffsetMap::StuffIt(const string& diffs,
   1.563 +                        int max_aoffset, int max_aprimeoffset) {
   1.564 +  Clear();
   1.565 +  diffs_ = diffs;
   1.566 +  max_aoffset_ = max_aoffset;
   1.567 +  max_aprimeoffset_ = max_aprimeoffset;
   1.568 +}
   1.569 +
   1.570 +
   1.571 +}  // namespace CLD2
   1.572 +

mercurial