other-licenses/snappy/src/snappy_unittest.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/other-licenses/snappy/src/snappy_unittest.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1153 @@
     1.4 +// Copyright 2005 and onwards Google Inc.
     1.5 +//
     1.6 +// Redistribution and use in source and binary forms, with or without
     1.7 +// modification, are permitted provided that the following conditions are
     1.8 +// met:
     1.9 +//
    1.10 +//     * Redistributions of source code must retain the above copyright
    1.11 +// notice, this list of conditions and the following disclaimer.
    1.12 +//     * Redistributions in binary form must reproduce the above
    1.13 +// copyright notice, this list of conditions and the following disclaimer
    1.14 +// in the documentation and/or other materials provided with the
    1.15 +// distribution.
    1.16 +//     * Neither the name of Google Inc. nor the names of its
    1.17 +// contributors may be used to endorse or promote products derived from
    1.18 +// this software without specific prior written permission.
    1.19 +//
    1.20 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    1.21 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    1.22 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    1.23 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    1.24 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    1.25 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    1.26 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    1.27 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    1.28 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.29 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.30 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.31 +
    1.32 +#include <math.h>
    1.33 +#include <stdlib.h>
    1.34 +
    1.35 +
    1.36 +#include <algorithm>
    1.37 +#include <string>
    1.38 +#include <vector>
    1.39 +
    1.40 +#include "snappy.h"
    1.41 +#include "snappy-internal.h"
    1.42 +#include "snappy-test.h"
    1.43 +#include "snappy-sinksource.h"
    1.44 +
    1.45 +DEFINE_int32(start_len, -1,
    1.46 +             "Starting prefix size for testing (-1: just full file contents)");
    1.47 +DEFINE_int32(end_len, -1,
    1.48 +             "Starting prefix size for testing (-1: just full file contents)");
    1.49 +DEFINE_int32(bytes, 10485760,
    1.50 +             "How many bytes to compress/uncompress per file for timing");
    1.51 +
    1.52 +DEFINE_bool(zlib, false,
    1.53 +            "Run zlib compression (http://www.zlib.net)");
    1.54 +DEFINE_bool(lzo, false,
    1.55 +            "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
    1.56 +DEFINE_bool(quicklz, false,
    1.57 +            "Run quickLZ compression (http://www.quicklz.com/)");
    1.58 +DEFINE_bool(liblzf, false,
    1.59 +            "Run libLZF compression "
    1.60 +            "(http://www.goof.com/pcg/marc/liblzf.html)");
    1.61 +DEFINE_bool(fastlz, false,
    1.62 +            "Run FastLZ compression (http://www.fastlz.org/");
    1.63 +DEFINE_bool(snappy, true, "Run snappy compression");
    1.64 +
    1.65 +
    1.66 +DEFINE_bool(write_compressed, false,
    1.67 +            "Write compressed versions of each file to <file>.comp");
    1.68 +DEFINE_bool(write_uncompressed, false,
    1.69 +            "Write uncompressed versions of each file to <file>.uncomp");
    1.70 +
    1.71 +namespace snappy {
    1.72 +
    1.73 +
    1.74 +#ifdef HAVE_FUNC_MMAP
    1.75 +
    1.76 +// To test against code that reads beyond its input, this class copies a
    1.77 +// string to a newly allocated group of pages, the last of which
    1.78 +// is made unreadable via mprotect. Note that we need to allocate the
    1.79 +// memory with mmap(), as POSIX allows mprotect() only on memory allocated
    1.80 +// with mmap(), and some malloc/posix_memalign implementations expect to
    1.81 +// be able to read previously allocated memory while doing heap allocations.
    1.82 +class DataEndingAtUnreadablePage {
    1.83 + public:
    1.84 +  explicit DataEndingAtUnreadablePage(const string& s) {
    1.85 +    const size_t page_size = getpagesize();
    1.86 +    const size_t size = s.size();
    1.87 +    // Round up space for string to a multiple of page_size.
    1.88 +    size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
    1.89 +    alloc_size_ = space_for_string + page_size;
    1.90 +    mem_ = mmap(NULL, alloc_size_,
    1.91 +                PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
    1.92 +    CHECK_NE(MAP_FAILED, mem_);
    1.93 +    protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
    1.94 +    char* dst = protected_page_ - size;
    1.95 +    memcpy(dst, s.data(), size);
    1.96 +    data_ = dst;
    1.97 +    size_ = size;
    1.98 +    // Make guard page unreadable.
    1.99 +    CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
   1.100 +  }
   1.101 +
   1.102 +  ~DataEndingAtUnreadablePage() {
   1.103 +    // Undo the mprotect.
   1.104 +    CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
   1.105 +    CHECK_EQ(0, munmap(mem_, alloc_size_));
   1.106 +  }
   1.107 +
   1.108 +  const char* data() const { return data_; }
   1.109 +  size_t size() const { return size_; }
   1.110 +
   1.111 + private:
   1.112 +  size_t alloc_size_;
   1.113 +  void* mem_;
   1.114 +  char* protected_page_;
   1.115 +  const char* data_;
   1.116 +  size_t size_;
   1.117 +};
   1.118 +
   1.119 +#else  // HAVE_FUNC_MMAP
   1.120 +
   1.121 +// Fallback for systems without mmap.
   1.122 +typedef string DataEndingAtUnreadablePage;
   1.123 +
   1.124 +#endif
   1.125 +
   1.126 +enum CompressorType {
   1.127 +  ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
   1.128 +};
   1.129 +
   1.130 +const char* names[] = {
   1.131 +  "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
   1.132 +};
   1.133 +
   1.134 +static size_t MinimumRequiredOutputSpace(size_t input_size,
   1.135 +                                         CompressorType comp) {
   1.136 +  switch (comp) {
   1.137 +#ifdef ZLIB_VERSION
   1.138 +    case ZLIB:
   1.139 +      return ZLib::MinCompressbufSize(input_size);
   1.140 +#endif  // ZLIB_VERSION
   1.141 +
   1.142 +#ifdef LZO_VERSION
   1.143 +    case LZO:
   1.144 +      return input_size + input_size/64 + 16 + 3;
   1.145 +#endif  // LZO_VERSION
   1.146 +
   1.147 +#ifdef LZF_VERSION
   1.148 +    case LIBLZF:
   1.149 +      return input_size;
   1.150 +#endif  // LZF_VERSION
   1.151 +
   1.152 +#ifdef QLZ_VERSION_MAJOR
   1.153 +    case QUICKLZ:
   1.154 +      return input_size + 36000;  // 36000 is used for scratch.
   1.155 +#endif  // QLZ_VERSION_MAJOR
   1.156 +
   1.157 +#ifdef FASTLZ_VERSION
   1.158 +    case FASTLZ:
   1.159 +      return max(static_cast<int>(ceil(input_size * 1.05)), 66);
   1.160 +#endif  // FASTLZ_VERSION
   1.161 +
   1.162 +    case SNAPPY:
   1.163 +      return snappy::MaxCompressedLength(input_size);
   1.164 +
   1.165 +    default:
   1.166 +      LOG(FATAL) << "Unknown compression type number " << comp;
   1.167 +  }
   1.168 +}
   1.169 +
   1.170 +// Returns true if we successfully compressed, false otherwise.
   1.171 +//
   1.172 +// If compressed_is_preallocated is set, do not resize the compressed buffer.
   1.173 +// This is typically what you want for a benchmark, in order to not spend
   1.174 +// time in the memory allocator. If you do set this flag, however,
   1.175 +// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
   1.176 +// number of bytes, and may contain junk bytes at the end after return.
   1.177 +static bool Compress(const char* input, size_t input_size, CompressorType comp,
   1.178 +                     string* compressed, bool compressed_is_preallocated) {
   1.179 +  if (!compressed_is_preallocated) {
   1.180 +    compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
   1.181 +  }
   1.182 +
   1.183 +  switch (comp) {
   1.184 +#ifdef ZLIB_VERSION
   1.185 +    case ZLIB: {
   1.186 +      ZLib zlib;
   1.187 +      uLongf destlen = compressed->size();
   1.188 +      int ret = zlib.Compress(
   1.189 +          reinterpret_cast<Bytef*>(string_as_array(compressed)),
   1.190 +          &destlen,
   1.191 +          reinterpret_cast<const Bytef*>(input),
   1.192 +          input_size);
   1.193 +      CHECK_EQ(Z_OK, ret);
   1.194 +      if (!compressed_is_preallocated) {
   1.195 +        compressed->resize(destlen);
   1.196 +      }
   1.197 +      return true;
   1.198 +    }
   1.199 +#endif  // ZLIB_VERSION
   1.200 +
   1.201 +#ifdef LZO_VERSION
   1.202 +    case LZO: {
   1.203 +      unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
   1.204 +      lzo_uint destlen;
   1.205 +      int ret = lzo1x_1_15_compress(
   1.206 +          reinterpret_cast<const uint8*>(input),
   1.207 +          input_size,
   1.208 +          reinterpret_cast<uint8*>(string_as_array(compressed)),
   1.209 +          &destlen,
   1.210 +          mem);
   1.211 +      CHECK_EQ(LZO_E_OK, ret);
   1.212 +      delete[] mem;
   1.213 +      if (!compressed_is_preallocated) {
   1.214 +        compressed->resize(destlen);
   1.215 +      }
   1.216 +      break;
   1.217 +    }
   1.218 +#endif  // LZO_VERSION
   1.219 +
   1.220 +#ifdef LZF_VERSION
   1.221 +    case LIBLZF: {
   1.222 +      int destlen = lzf_compress(input,
   1.223 +                                 input_size,
   1.224 +                                 string_as_array(compressed),
   1.225 +                                 input_size);
   1.226 +      if (destlen == 0) {
   1.227 +        // lzf *can* cause lots of blowup when compressing, so they
   1.228 +        // recommend to limit outsize to insize, and just not compress
   1.229 +        // if it's bigger.  Ideally, we'd just swap input and output.
   1.230 +        compressed->assign(input, input_size);
   1.231 +        destlen = input_size;
   1.232 +      }
   1.233 +      if (!compressed_is_preallocated) {
   1.234 +        compressed->resize(destlen);
   1.235 +      }
   1.236 +      break;
   1.237 +    }
   1.238 +#endif  // LZF_VERSION
   1.239 +
   1.240 +#ifdef QLZ_VERSION_MAJOR
   1.241 +    case QUICKLZ: {
   1.242 +      qlz_state_compress *state_compress = new qlz_state_compress;
   1.243 +      int destlen = qlz_compress(input,
   1.244 +                                 string_as_array(compressed),
   1.245 +                                 input_size,
   1.246 +                                 state_compress);
   1.247 +      delete state_compress;
   1.248 +      CHECK_NE(0, destlen);
   1.249 +      if (!compressed_is_preallocated) {
   1.250 +        compressed->resize(destlen);
   1.251 +      }
   1.252 +      break;
   1.253 +    }
   1.254 +#endif  // QLZ_VERSION_MAJOR
   1.255 +
   1.256 +#ifdef FASTLZ_VERSION
   1.257 +    case FASTLZ: {
   1.258 +      // Use level 1 compression since we mostly care about speed.
   1.259 +      int destlen = fastlz_compress_level(
   1.260 +          1,
   1.261 +          input,
   1.262 +          input_size,
   1.263 +          string_as_array(compressed));
   1.264 +      if (!compressed_is_preallocated) {
   1.265 +        compressed->resize(destlen);
   1.266 +      }
   1.267 +      CHECK_NE(destlen, 0);
   1.268 +      break;
   1.269 +    }
   1.270 +#endif  // FASTLZ_VERSION
   1.271 +
   1.272 +    case SNAPPY: {
   1.273 +      size_t destlen;
   1.274 +      snappy::RawCompress(input, input_size,
   1.275 +                          string_as_array(compressed),
   1.276 +                          &destlen);
   1.277 +      CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
   1.278 +      if (!compressed_is_preallocated) {
   1.279 +        compressed->resize(destlen);
   1.280 +      }
   1.281 +      break;
   1.282 +    }
   1.283 +
   1.284 +
   1.285 +    default: {
   1.286 +      return false;     // the asked-for library wasn't compiled in
   1.287 +    }
   1.288 +  }
   1.289 +  return true;
   1.290 +}
   1.291 +
   1.292 +static bool Uncompress(const string& compressed, CompressorType comp,
   1.293 +                       int size, string* output) {
   1.294 +  switch (comp) {
   1.295 +#ifdef ZLIB_VERSION
   1.296 +    case ZLIB: {
   1.297 +      output->resize(size);
   1.298 +      ZLib zlib;
   1.299 +      uLongf destlen = output->size();
   1.300 +      int ret = zlib.Uncompress(
   1.301 +          reinterpret_cast<Bytef*>(string_as_array(output)),
   1.302 +          &destlen,
   1.303 +          reinterpret_cast<const Bytef*>(compressed.data()),
   1.304 +          compressed.size());
   1.305 +      CHECK_EQ(Z_OK, ret);
   1.306 +      CHECK_EQ(static_cast<uLongf>(size), destlen);
   1.307 +      break;
   1.308 +    }
   1.309 +#endif  // ZLIB_VERSION
   1.310 +
   1.311 +#ifdef LZO_VERSION
   1.312 +    case LZO: {
   1.313 +      output->resize(size);
   1.314 +      lzo_uint destlen;
   1.315 +      int ret = lzo1x_decompress(
   1.316 +          reinterpret_cast<const uint8*>(compressed.data()),
   1.317 +          compressed.size(),
   1.318 +          reinterpret_cast<uint8*>(string_as_array(output)),
   1.319 +          &destlen,
   1.320 +          NULL);
   1.321 +      CHECK_EQ(LZO_E_OK, ret);
   1.322 +      CHECK_EQ(static_cast<lzo_uint>(size), destlen);
   1.323 +      break;
   1.324 +    }
   1.325 +#endif  // LZO_VERSION
   1.326 +
   1.327 +#ifdef LZF_VERSION
   1.328 +    case LIBLZF: {
   1.329 +      output->resize(size);
   1.330 +      int destlen = lzf_decompress(compressed.data(),
   1.331 +                                   compressed.size(),
   1.332 +                                   string_as_array(output),
   1.333 +                                   output->size());
   1.334 +      if (destlen == 0) {
   1.335 +        // This error probably means we had decided not to compress,
   1.336 +        // and thus have stored input in output directly.
   1.337 +        output->assign(compressed.data(), compressed.size());
   1.338 +        destlen = compressed.size();
   1.339 +      }
   1.340 +      CHECK_EQ(destlen, size);
   1.341 +      break;
   1.342 +    }
   1.343 +#endif  // LZF_VERSION
   1.344 +
   1.345 +#ifdef QLZ_VERSION_MAJOR
   1.346 +    case QUICKLZ: {
   1.347 +      output->resize(size);
   1.348 +      qlz_state_decompress *state_decompress = new qlz_state_decompress;
   1.349 +      int destlen = qlz_decompress(compressed.data(),
   1.350 +                                   string_as_array(output),
   1.351 +                                   state_decompress);
   1.352 +      delete state_decompress;
   1.353 +      CHECK_EQ(destlen, size);
   1.354 +      break;
   1.355 +    }
   1.356 +#endif  // QLZ_VERSION_MAJOR
   1.357 +
   1.358 +#ifdef FASTLZ_VERSION
   1.359 +    case FASTLZ: {
   1.360 +      output->resize(size);
   1.361 +      int destlen = fastlz_decompress(compressed.data(),
   1.362 +                                      compressed.length(),
   1.363 +                                      string_as_array(output),
   1.364 +                                      size);
   1.365 +      CHECK_EQ(destlen, size);
   1.366 +      break;
   1.367 +    }
   1.368 +#endif  // FASTLZ_VERSION
   1.369 +
   1.370 +    case SNAPPY: {
   1.371 +      snappy::RawUncompress(compressed.data(), compressed.size(),
   1.372 +                            string_as_array(output));
   1.373 +      break;
   1.374 +    }
   1.375 +
   1.376 +
   1.377 +    default: {
   1.378 +      return false;     // the asked-for library wasn't compiled in
   1.379 +    }
   1.380 +  }
   1.381 +  return true;
   1.382 +}
   1.383 +
   1.384 +static void Measure(const char* data,
   1.385 +                    size_t length,
   1.386 +                    CompressorType comp,
   1.387 +                    int repeats,
   1.388 +                    int block_size) {
   1.389 +  // Run tests a few time and pick median running times
   1.390 +  static const int kRuns = 5;
   1.391 +  double ctime[kRuns];
   1.392 +  double utime[kRuns];
   1.393 +  int compressed_size = 0;
   1.394 +
   1.395 +  {
   1.396 +    // Chop the input into blocks
   1.397 +    int num_blocks = (length + block_size - 1) / block_size;
   1.398 +    vector<const char*> input(num_blocks);
   1.399 +    vector<size_t> input_length(num_blocks);
   1.400 +    vector<string> compressed(num_blocks);
   1.401 +    vector<string> output(num_blocks);
   1.402 +    for (int b = 0; b < num_blocks; b++) {
   1.403 +      int input_start = b * block_size;
   1.404 +      int input_limit = min<int>((b+1)*block_size, length);
   1.405 +      input[b] = data+input_start;
   1.406 +      input_length[b] = input_limit-input_start;
   1.407 +
   1.408 +      // Pre-grow the output buffer so we don't measure string append time.
   1.409 +      compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
   1.410 +    }
   1.411 +
   1.412 +    // First, try one trial compression to make sure the code is compiled in
   1.413 +    if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
   1.414 +      LOG(WARNING) << "Skipping " << names[comp] << ": "
   1.415 +                   << "library not compiled in";
   1.416 +      return;
   1.417 +    }
   1.418 +
   1.419 +    for (int run = 0; run < kRuns; run++) {
   1.420 +      CycleTimer ctimer, utimer;
   1.421 +
   1.422 +      for (int b = 0; b < num_blocks; b++) {
   1.423 +        // Pre-grow the output buffer so we don't measure string append time.
   1.424 +        compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
   1.425 +      }
   1.426 +
   1.427 +      ctimer.Start();
   1.428 +      for (int b = 0; b < num_blocks; b++)
   1.429 +        for (int i = 0; i < repeats; i++)
   1.430 +          Compress(input[b], input_length[b], comp, &compressed[b], true);
   1.431 +      ctimer.Stop();
   1.432 +
   1.433 +      // Compress once more, with resizing, so we don't leave junk
   1.434 +      // at the end that will confuse the decompressor.
   1.435 +      for (int b = 0; b < num_blocks; b++) {
   1.436 +        Compress(input[b], input_length[b], comp, &compressed[b], false);
   1.437 +      }
   1.438 +
   1.439 +      for (int b = 0; b < num_blocks; b++) {
   1.440 +        output[b].resize(input_length[b]);
   1.441 +      }
   1.442 +
   1.443 +      utimer.Start();
   1.444 +      for (int i = 0; i < repeats; i++)
   1.445 +        for (int b = 0; b < num_blocks; b++)
   1.446 +          Uncompress(compressed[b], comp, input_length[b], &output[b]);
   1.447 +      utimer.Stop();
   1.448 +
   1.449 +      ctime[run] = ctimer.Get();
   1.450 +      utime[run] = utimer.Get();
   1.451 +    }
   1.452 +
   1.453 +    compressed_size = 0;
   1.454 +    for (int i = 0; i < compressed.size(); i++) {
   1.455 +      compressed_size += compressed[i].size();
   1.456 +    }
   1.457 +  }
   1.458 +
   1.459 +  sort(ctime, ctime + kRuns);
   1.460 +  sort(utime, utime + kRuns);
   1.461 +  const int med = kRuns/2;
   1.462 +
   1.463 +  float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
   1.464 +  float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
   1.465 +  string x = names[comp];
   1.466 +  x += ":";
   1.467 +  string urate = (uncomp_rate >= 0)
   1.468 +                 ? StringPrintf("%.1f", uncomp_rate)
   1.469 +                 : string("?");
   1.470 +  printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%%  "
   1.471 +         "comp %5.1f MB/s  uncomp %5s MB/s\n",
   1.472 +         x.c_str(),
   1.473 +         block_size/(1<<20),
   1.474 +         static_cast<int>(length), static_cast<uint32>(compressed_size),
   1.475 +         (compressed_size * 100.0) / max<int>(1, length),
   1.476 +         comp_rate,
   1.477 +         urate.c_str());
   1.478 +}
   1.479 +
   1.480 +
   1.481 +static int VerifyString(const string& input) {
   1.482 +  string compressed;
   1.483 +  DataEndingAtUnreadablePage i(input);
   1.484 +  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
   1.485 +  CHECK_EQ(written, compressed.size());
   1.486 +  CHECK_LE(compressed.size(),
   1.487 +           snappy::MaxCompressedLength(input.size()));
   1.488 +  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
   1.489 +
   1.490 +  string uncompressed;
   1.491 +  DataEndingAtUnreadablePage c(compressed);
   1.492 +  CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
   1.493 +  CHECK_EQ(uncompressed, input);
   1.494 +  return uncompressed.size();
   1.495 +}
   1.496 +
   1.497 +
   1.498 +// Test that data compressed by a compressor that does not
   1.499 +// obey block sizes is uncompressed properly.
   1.500 +static void VerifyNonBlockedCompression(const string& input) {
   1.501 +  if (input.length() > snappy::kBlockSize) {
   1.502 +    // We cannot test larger blocks than the maximum block size, obviously.
   1.503 +    return;
   1.504 +  }
   1.505 +
   1.506 +  string prefix;
   1.507 +  Varint::Append32(&prefix, input.size());
   1.508 +
   1.509 +  // Setup compression table
   1.510 +  snappy::internal::WorkingMemory wmem;
   1.511 +  int table_size;
   1.512 +  uint16* table = wmem.GetHashTable(input.size(), &table_size);
   1.513 +
   1.514 +  // Compress entire input in one shot
   1.515 +  string compressed;
   1.516 +  compressed += prefix;
   1.517 +  compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
   1.518 +  char* dest = string_as_array(&compressed) + prefix.size();
   1.519 +  char* end = snappy::internal::CompressFragment(input.data(), input.size(),
   1.520 +                                                dest, table, table_size);
   1.521 +  compressed.resize(end - compressed.data());
   1.522 +
   1.523 +  // Uncompress into string
   1.524 +  string uncomp_str;
   1.525 +  CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
   1.526 +  CHECK_EQ(uncomp_str, input);
   1.527 +
   1.528 +}
   1.529 +
   1.530 +// Expand the input so that it is at least K times as big as block size
   1.531 +static string Expand(const string& input) {
   1.532 +  static const int K = 3;
   1.533 +  string data = input;
   1.534 +  while (data.size() < K * snappy::kBlockSize) {
   1.535 +    data += input;
   1.536 +  }
   1.537 +  return data;
   1.538 +}
   1.539 +
   1.540 +static int Verify(const string& input) {
   1.541 +  VLOG(1) << "Verifying input of size " << input.size();
   1.542 +
   1.543 +  // Compress using string based routines
   1.544 +  const int result = VerifyString(input);
   1.545 +
   1.546 +
   1.547 +  VerifyNonBlockedCompression(input);
   1.548 +  if (!input.empty()) {
   1.549 +    VerifyNonBlockedCompression(Expand(input));
   1.550 +  }
   1.551 +
   1.552 +
   1.553 +  return result;
   1.554 +}
   1.555 +
   1.556 +// This test checks to ensure that snappy doesn't coredump if it gets
   1.557 +// corrupted data.
   1.558 +
   1.559 +static bool IsValidCompressedBuffer(const string& c) {
   1.560 +  return snappy::IsValidCompressedBuffer(c.data(), c.size());
   1.561 +}
   1.562 +static bool Uncompress(const string& c, string* u) {
   1.563 +  return snappy::Uncompress(c.data(), c.size(), u);
   1.564 +}
   1.565 +
   1.566 +TYPED_TEST(CorruptedTest, VerifyCorrupted) {
   1.567 +  string source = "making sure we don't crash with corrupted input";
   1.568 +  VLOG(1) << source;
   1.569 +  string dest;
   1.570 +  TypeParam uncmp;
   1.571 +  snappy::Compress(source.data(), source.size(), &dest);
   1.572 +
   1.573 +  // Mess around with the data. It's hard to simulate all possible
   1.574 +  // corruptions; this is just one example ...
   1.575 +  CHECK_GT(dest.size(), 3);
   1.576 +  dest[1]--;
   1.577 +  dest[3]++;
   1.578 +  // this really ought to fail.
   1.579 +  CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
   1.580 +  CHECK(!Uncompress(TypeParam(dest), &uncmp));
   1.581 +
   1.582 +  // This is testing for a security bug - a buffer that decompresses to 100k
   1.583 +  // but we lie in the snappy header and only reserve 0 bytes of memory :)
   1.584 +  source.resize(100000);
   1.585 +  for (int i = 0; i < source.length(); ++i) {
   1.586 +    source[i] = 'A';
   1.587 +  }
   1.588 +  snappy::Compress(source.data(), source.size(), &dest);
   1.589 +  dest[0] = dest[1] = dest[2] = dest[3] = 0;
   1.590 +  CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
   1.591 +  CHECK(!Uncompress(TypeParam(dest), &uncmp));
   1.592 +
   1.593 +  if (sizeof(void *) == 4) {
   1.594 +    // Another security check; check a crazy big length can't DoS us with an
   1.595 +    // over-allocation.
   1.596 +    // Currently this is done only for 32-bit builds.  On 64-bit builds,
   1.597 +    // where 3GBytes might be an acceptable allocation size, Uncompress()
   1.598 +    // attempts to decompress, and sometimes causes the test to run out of
   1.599 +    // memory.
   1.600 +    dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
   1.601 +    // This decodes to a really large size, i.e., 3221225471 bytes
   1.602 +    dest[4] = 'k';
   1.603 +    CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
   1.604 +    CHECK(!Uncompress(TypeParam(dest), &uncmp));
   1.605 +    dest[0] = dest[1] = dest[2] = 0xff;
   1.606 +    dest[3] = 0x7f;
   1.607 +    CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
   1.608 +    CHECK(!Uncompress(TypeParam(dest), &uncmp));
   1.609 +  } else {
   1.610 +    LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
   1.611 +  }
   1.612 +
   1.613 +  // try reading stuff in from a bad file.
   1.614 +  for (int i = 1; i <= 3; ++i) {
   1.615 +    string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str());
   1.616 +    string uncmp;
   1.617 +    // check that we don't return a crazy length
   1.618 +    size_t ulen;
   1.619 +    CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
   1.620 +          || (ulen < (1<<20)));
   1.621 +    uint32 ulen2;
   1.622 +    snappy::ByteArraySource source(data.data(), data.size());
   1.623 +    CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
   1.624 +          (ulen2 < (1<<20)));
   1.625 +    CHECK(!IsValidCompressedBuffer(TypeParam(data)));
   1.626 +    CHECK(!Uncompress(TypeParam(data), &uncmp));
   1.627 +  }
   1.628 +}
   1.629 +
   1.630 +// Helper routines to construct arbitrary compressed strings.
   1.631 +// These mirror the compression code in snappy.cc, but are copied
   1.632 +// here so that we can bypass some limitations in the how snappy.cc
   1.633 +// invokes these routines.
   1.634 +static void AppendLiteral(string* dst, const string& literal) {
   1.635 +  if (literal.empty()) return;
   1.636 +  int n = literal.size() - 1;
   1.637 +  if (n < 60) {
   1.638 +    // Fit length in tag byte
   1.639 +    dst->push_back(0 | (n << 2));
   1.640 +  } else {
   1.641 +    // Encode in upcoming bytes
   1.642 +    char number[4];
   1.643 +    int count = 0;
   1.644 +    while (n > 0) {
   1.645 +      number[count++] = n & 0xff;
   1.646 +      n >>= 8;
   1.647 +    }
   1.648 +    dst->push_back(0 | ((59+count) << 2));
   1.649 +    *dst += string(number, count);
   1.650 +  }
   1.651 +  *dst += literal;
   1.652 +}
   1.653 +
   1.654 +static void AppendCopy(string* dst, int offset, int length) {
   1.655 +  while (length > 0) {
   1.656 +    // Figure out how much to copy in one shot
   1.657 +    int to_copy;
   1.658 +    if (length >= 68) {
   1.659 +      to_copy = 64;
   1.660 +    } else if (length > 64) {
   1.661 +      to_copy = 60;
   1.662 +    } else {
   1.663 +      to_copy = length;
   1.664 +    }
   1.665 +    length -= to_copy;
   1.666 +
   1.667 +    if ((to_copy < 12) && (offset < 2048)) {
   1.668 +      assert(to_copy-4 < 8);            // Must fit in 3 bits
   1.669 +      dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
   1.670 +      dst->push_back(offset & 0xff);
   1.671 +    } else if (offset < 65536) {
   1.672 +      dst->push_back(2 | ((to_copy-1) << 2));
   1.673 +      dst->push_back(offset & 0xff);
   1.674 +      dst->push_back(offset >> 8);
   1.675 +    } else {
   1.676 +      dst->push_back(3 | ((to_copy-1) << 2));
   1.677 +      dst->push_back(offset & 0xff);
   1.678 +      dst->push_back((offset >> 8) & 0xff);
   1.679 +      dst->push_back((offset >> 16) & 0xff);
   1.680 +      dst->push_back((offset >> 24) & 0xff);
   1.681 +    }
   1.682 +  }
   1.683 +}
   1.684 +
   1.685 +TEST(Snappy, SimpleTests) {
   1.686 +  Verify("");
   1.687 +  Verify("a");
   1.688 +  Verify("ab");
   1.689 +  Verify("abc");
   1.690 +
   1.691 +  Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
   1.692 +  Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
   1.693 +  Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
   1.694 +  Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
   1.695 +  Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
   1.696 +}
   1.697 +
   1.698 +// Verify max blowup (lots of four-byte copies)
   1.699 +TEST(Snappy, MaxBlowup) {
   1.700 +  string input;
   1.701 +  for (int i = 0; i < 20000; i++) {
   1.702 +    ACMRandom rnd(i);
   1.703 +    uint32 bytes = static_cast<uint32>(rnd.Next());
   1.704 +    input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
   1.705 +  }
   1.706 +  for (int i = 19999; i >= 0; i--) {
   1.707 +    ACMRandom rnd(i);
   1.708 +    uint32 bytes = static_cast<uint32>(rnd.Next());
   1.709 +    input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
   1.710 +  }
   1.711 +  Verify(input);
   1.712 +}
   1.713 +
   1.714 +TEST(Snappy, RandomData) {
   1.715 +  ACMRandom rnd(FLAGS_test_random_seed);
   1.716 +
   1.717 +  const int num_ops = 20000;
   1.718 +  for (int i = 0; i < num_ops; i++) {
   1.719 +    if ((i % 1000) == 0) {
   1.720 +      VLOG(0) << "Random op " << i << " of " << num_ops;
   1.721 +    }
   1.722 +
   1.723 +    string x;
   1.724 +    int len = rnd.Uniform(4096);
   1.725 +    if (i < 100) {
   1.726 +      len = 65536 + rnd.Uniform(65536);
   1.727 +    }
   1.728 +    while (x.size() < len) {
   1.729 +      int run_len = 1;
   1.730 +      if (rnd.OneIn(10)) {
   1.731 +        run_len = rnd.Skewed(8);
   1.732 +      }
   1.733 +      char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
   1.734 +      while (run_len-- > 0 && x.size() < len) {
   1.735 +        x += c;
   1.736 +      }
   1.737 +    }
   1.738 +
   1.739 +    Verify(x);
   1.740 +  }
   1.741 +}
   1.742 +
   1.743 +TEST(Snappy, FourByteOffset) {
   1.744 +  // The new compressor cannot generate four-byte offsets since
   1.745 +  // it chops up the input into 32KB pieces.  So we hand-emit the
   1.746 +  // copy manually.
   1.747 +
   1.748 +  // The two fragments that make up the input string.
   1.749 +  string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
   1.750 +  string fragment2 = "some other string";
   1.751 +
   1.752 +  // How many times each fragment is emitted.
   1.753 +  const int n1 = 2;
   1.754 +  const int n2 = 100000 / fragment2.size();
   1.755 +  const int length = n1 * fragment1.size() + n2 * fragment2.size();
   1.756 +
   1.757 +  string compressed;
   1.758 +  Varint::Append32(&compressed, length);
   1.759 +
   1.760 +  AppendLiteral(&compressed, fragment1);
   1.761 +  string src = fragment1;
   1.762 +  for (int i = 0; i < n2; i++) {
   1.763 +    AppendLiteral(&compressed, fragment2);
   1.764 +    src += fragment2;
   1.765 +  }
   1.766 +  AppendCopy(&compressed, src.size(), fragment1.size());
   1.767 +  src += fragment1;
   1.768 +  CHECK_EQ(length, src.size());
   1.769 +
   1.770 +  string uncompressed;
   1.771 +  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
   1.772 +  CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed));
   1.773 +  CHECK_EQ(uncompressed, src);
   1.774 +}
   1.775 +
   1.776 +
   1.777 +static bool CheckUncompressedLength(const string& compressed,
   1.778 +                                    size_t* ulength) {
   1.779 +  const bool result1 = snappy::GetUncompressedLength(compressed.data(),
   1.780 +                                                     compressed.size(),
   1.781 +                                                     ulength);
   1.782 +
   1.783 +  snappy::ByteArraySource source(compressed.data(), compressed.size());
   1.784 +  uint32 length;
   1.785 +  const bool result2 = snappy::GetUncompressedLength(&source, &length);
   1.786 +  CHECK_EQ(result1, result2);
   1.787 +  return result1;
   1.788 +}
   1.789 +
   1.790 +TEST(SnappyCorruption, TruncatedVarint) {
   1.791 +  string compressed, uncompressed;
   1.792 +  size_t ulength;
   1.793 +  compressed.push_back('\xf0');
   1.794 +  CHECK(!CheckUncompressedLength(compressed, &ulength));
   1.795 +  CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
   1.796 +  CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
   1.797 +                            &uncompressed));
   1.798 +}
   1.799 +
   1.800 +TEST(SnappyCorruption, UnterminatedVarint) {
   1.801 +  string compressed, uncompressed;
   1.802 +  size_t ulength;
   1.803 +  compressed.push_back(128);
   1.804 +  compressed.push_back(128);
   1.805 +  compressed.push_back(128);
   1.806 +  compressed.push_back(128);
   1.807 +  compressed.push_back(128);
   1.808 +  compressed.push_back(10);
   1.809 +  CHECK(!CheckUncompressedLength(compressed, &ulength));
   1.810 +  CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
   1.811 +  CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
   1.812 +                            &uncompressed));
   1.813 +}
   1.814 +
   1.815 +TEST(Snappy, ReadPastEndOfBuffer) {
   1.816 +  // Check that we do not read past end of input
   1.817 +
   1.818 +  // Make a compressed string that ends with a single-byte literal
   1.819 +  string compressed;
   1.820 +  Varint::Append32(&compressed, 1);
   1.821 +  AppendLiteral(&compressed, "x");
   1.822 +
   1.823 +  string uncompressed;
   1.824 +  DataEndingAtUnreadablePage c(compressed);
   1.825 +  CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
   1.826 +  CHECK_EQ(uncompressed, string("x"));
   1.827 +}
   1.828 +
   1.829 +// Check for an infinite loop caused by a copy with offset==0
   1.830 +TEST(Snappy, ZeroOffsetCopy) {
   1.831 +  const char* compressed = "\x40\x12\x00\x00";
   1.832 +  //  \x40              Length (must be > kMaxIncrementCopyOverflow)
   1.833 +  //  \x12\x00\x00      Copy with offset==0, length==5
   1.834 +  char uncompressed[100];
   1.835 +  EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
   1.836 +}
   1.837 +
   1.838 +TEST(Snappy, ZeroOffsetCopyValidation) {
   1.839 +  const char* compressed = "\x05\x12\x00\x00";
   1.840 +  //  \x05              Length
   1.841 +  //  \x12\x00\x00      Copy with offset==0, length==5
   1.842 +  EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
   1.843 +}
   1.844 +
   1.845 +
   1.846 +namespace {
   1.847 +
   1.848 +int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
   1.849 +  return snappy::internal::FindMatchLength(s1, s2, s2 + length);
   1.850 +}
   1.851 +
   1.852 +}  // namespace
   1.853 +
   1.854 +TEST(Snappy, FindMatchLength) {
   1.855 +  // Exercise all different code paths through the function.
   1.856 +  // 64-bit version:
   1.857 +
   1.858 +  // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
   1.859 +  EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
   1.860 +  EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
   1.861 +
   1.862 +  // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
   1.863 +  EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
   1.864 +
   1.865 +  // Same, but edge cases.
   1.866 +  EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
   1.867 +  EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
   1.868 +
   1.869 +  // Find non-match at once in first loop.
   1.870 +  EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
   1.871 +  EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
   1.872 +  EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
   1.873 +  EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
   1.874 +
   1.875 +  // Find non-match in first loop after one block.
   1.876 +  EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
   1.877 +                                   "abcdefgh?1234567xxxxxxxx", 24));
   1.878 +  EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
   1.879 +                                   "abcdefgh0?234567xxxxxxxx", 24));
   1.880 +  EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
   1.881 +                                    "abcdefgh01237654xxxxxxxx", 24));
   1.882 +  EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
   1.883 +                                    "abcdefgh0123456?xxxxxxxx", 24));
   1.884 +
   1.885 +  // 32-bit version:
   1.886 +
   1.887 +  // Short matches.
   1.888 +  EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
   1.889 +  EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
   1.890 +  EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
   1.891 +  EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
   1.892 +  EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
   1.893 +  EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
   1.894 +  EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
   1.895 +  EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
   1.896 +  EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
   1.897 +  EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
   1.898 +
   1.899 +  // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
   1.900 +  EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
   1.901 +  EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
   1.902 +  EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
   1.903 +
   1.904 +  // Same, but edge cases.
   1.905 +  EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
   1.906 +  EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
   1.907 +
   1.908 +  // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
   1.909 +  EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
   1.910 +
   1.911 +  // Find non-match at once in first loop.
   1.912 +  EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
   1.913 +                                   "xxxxxx?123xxxxxxxx", 18));
   1.914 +  EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
   1.915 +                                   "xxxxxx0?23xxxxxxxx", 18));
   1.916 +  EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
   1.917 +                                   "xxxxxx0132xxxxxxxx", 18));
   1.918 +  EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
   1.919 +                                   "xxxxxx012?xxxxxxxx", 18));
   1.920 +
   1.921 +  // Same, but edge cases.
   1.922 +  EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
   1.923 +  EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
   1.924 +  EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
   1.925 +  EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
   1.926 +
   1.927 +  // Find non-match in first loop after one block.
   1.928 +  EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
   1.929 +                                    "xxxxxxabcd?123xx", 16));
   1.930 +  EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
   1.931 +                                    "xxxxxxabcd0?23xx", 16));
   1.932 +  EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
   1.933 +                                    "xxxxxxabcd0132xx", 16));
   1.934 +  EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
   1.935 +                                    "xxxxxxabcd012?xx", 16));
   1.936 +
   1.937 +  // Same, but edge cases.
   1.938 +  EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
   1.939 +  EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
   1.940 +  EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
   1.941 +  EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
   1.942 +}
   1.943 +
   1.944 +TEST(Snappy, FindMatchLengthRandom) {
   1.945 +  const int kNumTrials = 10000;
   1.946 +  const int kTypicalLength = 10;
   1.947 +  ACMRandom rnd(FLAGS_test_random_seed);
   1.948 +
   1.949 +  for (int i = 0; i < kNumTrials; i++) {
   1.950 +    string s, t;
   1.951 +    char a = rnd.Rand8();
   1.952 +    char b = rnd.Rand8();
   1.953 +    while (!rnd.OneIn(kTypicalLength)) {
   1.954 +      s.push_back(rnd.OneIn(2) ? a : b);
   1.955 +      t.push_back(rnd.OneIn(2) ? a : b);
   1.956 +    }
   1.957 +    DataEndingAtUnreadablePage u(s);
   1.958 +    DataEndingAtUnreadablePage v(t);
   1.959 +    int matched = snappy::internal::FindMatchLength(
   1.960 +        u.data(), v.data(), v.data() + t.size());
   1.961 +    if (matched == t.size()) {
   1.962 +      EXPECT_EQ(s, t);
   1.963 +    } else {
   1.964 +      EXPECT_NE(s[matched], t[matched]);
   1.965 +      for (int j = 0; j < matched; j++) {
   1.966 +        EXPECT_EQ(s[j], t[j]);
   1.967 +      }
   1.968 +    }
   1.969 +  }
   1.970 +}
   1.971 +
   1.972 +
   1.973 +static void CompressFile(const char* fname) {
   1.974 +  string fullinput;
   1.975 +  File::ReadFileToStringOrDie(fname, &fullinput);
   1.976 +
   1.977 +  string compressed;
   1.978 +  Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
   1.979 +
   1.980 +  File::WriteStringToFileOrDie(compressed,
   1.981 +                               string(fname).append(".comp").c_str());
   1.982 +}
   1.983 +
   1.984 +static void UncompressFile(const char* fname) {
   1.985 +  string fullinput;
   1.986 +  File::ReadFileToStringOrDie(fname, &fullinput);
   1.987 +
   1.988 +  size_t uncompLength;
   1.989 +  CHECK(CheckUncompressedLength(fullinput, &uncompLength));
   1.990 +
   1.991 +  string uncompressed;
   1.992 +  uncompressed.resize(uncompLength);
   1.993 +  CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
   1.994 +
   1.995 +  File::WriteStringToFileOrDie(uncompressed,
   1.996 +                               string(fname).append(".uncomp").c_str());
   1.997 +}
   1.998 +
   1.999 +static void MeasureFile(const char* fname) {
  1.1000 +  string fullinput;
  1.1001 +  File::ReadFileToStringOrDie(fname, &fullinput);
  1.1002 +  printf("%-40s :\n", fname);
  1.1003 +
  1.1004 +  int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
  1.1005 +  int end_len = fullinput.size();
  1.1006 +  if (FLAGS_end_len >= 0) {
  1.1007 +    end_len = min<int>(fullinput.size(), FLAGS_end_len);
  1.1008 +  }
  1.1009 +  for (int len = start_len; len <= end_len; len++) {
  1.1010 +    const char* const input = fullinput.data();
  1.1011 +    int repeats = (FLAGS_bytes + len) / (len + 1);
  1.1012 +    if (FLAGS_zlib)     Measure(input, len, ZLIB, repeats, 1024<<10);
  1.1013 +    if (FLAGS_lzo)      Measure(input, len, LZO, repeats, 1024<<10);
  1.1014 +    if (FLAGS_liblzf)   Measure(input, len, LIBLZF, repeats, 1024<<10);
  1.1015 +    if (FLAGS_quicklz)  Measure(input, len, QUICKLZ, repeats, 1024<<10);
  1.1016 +    if (FLAGS_fastlz)   Measure(input, len, FASTLZ, repeats, 1024<<10);
  1.1017 +    if (FLAGS_snappy)    Measure(input, len, SNAPPY, repeats, 4096<<10);
  1.1018 +
  1.1019 +    // For block-size based measurements
  1.1020 +    if (0 && FLAGS_snappy) {
  1.1021 +      Measure(input, len, SNAPPY, repeats, 8<<10);
  1.1022 +      Measure(input, len, SNAPPY, repeats, 16<<10);
  1.1023 +      Measure(input, len, SNAPPY, repeats, 32<<10);
  1.1024 +      Measure(input, len, SNAPPY, repeats, 64<<10);
  1.1025 +      Measure(input, len, SNAPPY, repeats, 256<<10);
  1.1026 +      Measure(input, len, SNAPPY, repeats, 1024<<10);
  1.1027 +    }
  1.1028 +  }
  1.1029 +}
  1.1030 +
  1.1031 +static struct {
  1.1032 +  const char* label;
  1.1033 +  const char* filename;
  1.1034 +} files[] = {
  1.1035 +  { "html", "html" },
  1.1036 +  { "urls", "urls.10K" },
  1.1037 +  { "jpg", "house.jpg" },
  1.1038 +  { "pdf", "mapreduce-osdi-1.pdf" },
  1.1039 +  { "html4", "html_x_4" },
  1.1040 +  { "cp", "cp.html" },
  1.1041 +  { "c", "fields.c" },
  1.1042 +  { "lsp", "grammar.lsp" },
  1.1043 +  { "xls", "kennedy.xls" },
  1.1044 +  { "txt1", "alice29.txt" },
  1.1045 +  { "txt2", "asyoulik.txt" },
  1.1046 +  { "txt3", "lcet10.txt" },
  1.1047 +  { "txt4", "plrabn12.txt" },
  1.1048 +  { "bin", "ptt5" },
  1.1049 +  { "sum", "sum" },
  1.1050 +  { "man", "xargs.1" },
  1.1051 +  { "pb", "geo.protodata" },
  1.1052 +  { "gaviota", "kppkn.gtb" },
  1.1053 +};
  1.1054 +
  1.1055 +static void BM_UFlat(int iters, int arg) {
  1.1056 +  StopBenchmarkTiming();
  1.1057 +
  1.1058 +  // Pick file to process based on "arg"
  1.1059 +  CHECK_GE(arg, 0);
  1.1060 +  CHECK_LT(arg, ARRAYSIZE(files));
  1.1061 +  string contents = ReadTestDataFile(files[arg].filename);
  1.1062 +
  1.1063 +  string zcontents;
  1.1064 +  snappy::Compress(contents.data(), contents.size(), &zcontents);
  1.1065 +  char* dst = new char[contents.size()];
  1.1066 +
  1.1067 +  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1.1068 +                             static_cast<int64>(contents.size()));
  1.1069 +  SetBenchmarkLabel(files[arg].label);
  1.1070 +  StartBenchmarkTiming();
  1.1071 +  while (iters-- > 0) {
  1.1072 +    CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
  1.1073 +  }
  1.1074 +  StopBenchmarkTiming();
  1.1075 +
  1.1076 +  delete[] dst;
  1.1077 +}
  1.1078 +BENCHMARK(BM_UFlat)->DenseRange(0, 17);
  1.1079 +
  1.1080 +static void BM_UValidate(int iters, int arg) {
  1.1081 +  StopBenchmarkTiming();
  1.1082 +
  1.1083 +  // Pick file to process based on "arg"
  1.1084 +  CHECK_GE(arg, 0);
  1.1085 +  CHECK_LT(arg, ARRAYSIZE(files));
  1.1086 +  string contents = ReadTestDataFile(files[arg].filename);
  1.1087 +
  1.1088 +  string zcontents;
  1.1089 +  snappy::Compress(contents.data(), contents.size(), &zcontents);
  1.1090 +
  1.1091 +  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1.1092 +                             static_cast<int64>(contents.size()));
  1.1093 +  SetBenchmarkLabel(files[arg].label);
  1.1094 +  StartBenchmarkTiming();
  1.1095 +  while (iters-- > 0) {
  1.1096 +    CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
  1.1097 +  }
  1.1098 +  StopBenchmarkTiming();
  1.1099 +}
  1.1100 +BENCHMARK(BM_UValidate)->DenseRange(0, 4);
  1.1101 +
  1.1102 +
  1.1103 +static void BM_ZFlat(int iters, int arg) {
  1.1104 +  StopBenchmarkTiming();
  1.1105 +
  1.1106 +  // Pick file to process based on "arg"
  1.1107 +  CHECK_GE(arg, 0);
  1.1108 +  CHECK_LT(arg, ARRAYSIZE(files));
  1.1109 +  string contents = ReadTestDataFile(files[arg].filename);
  1.1110 +
  1.1111 +  char* dst = new char[snappy::MaxCompressedLength(contents.size())];
  1.1112 +
  1.1113 +  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
  1.1114 +                             static_cast<int64>(contents.size()));
  1.1115 +  StartBenchmarkTiming();
  1.1116 +
  1.1117 +  size_t zsize = 0;
  1.1118 +  while (iters-- > 0) {
  1.1119 +    snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
  1.1120 +  }
  1.1121 +  StopBenchmarkTiming();
  1.1122 +  const double compression_ratio =
  1.1123 +      static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
  1.1124 +  SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
  1.1125 +                                 files[arg].label, 100.0 * compression_ratio));
  1.1126 +  VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
  1.1127 +                          files[arg].label, contents.size(), zsize);
  1.1128 +  delete[] dst;
  1.1129 +}
  1.1130 +BENCHMARK(BM_ZFlat)->DenseRange(0, 17);
  1.1131 +
  1.1132 +
  1.1133 +}  // namespace snappy
  1.1134 +
  1.1135 +
  1.1136 +int main(int argc, char** argv) {
  1.1137 +  InitGoogle(argv[0], &argc, &argv, true);
  1.1138 +  File::Init();
  1.1139 +  RunSpecifiedBenchmarks();
  1.1140 +
  1.1141 +
  1.1142 +  if (argc >= 2) {
  1.1143 +    for (int arg = 1; arg < argc; arg++) {
  1.1144 +      if (FLAGS_write_compressed) {
  1.1145 +        CompressFile(argv[arg]);
  1.1146 +      } else if (FLAGS_write_uncompressed) {
  1.1147 +        UncompressFile(argv[arg]);
  1.1148 +      } else {
  1.1149 +        MeasureFile(argv[arg]);
  1.1150 +      }
  1.1151 +    }
  1.1152 +    return 0;
  1.1153 +  }
  1.1154 +
  1.1155 +  return RUN_ALL_TESTS();
  1.1156 +}

mercurial