1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/other-licenses/snappy/src/snappy_unittest.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1153 @@ 1.4 +// Copyright 2005 and onwards Google Inc. 1.5 +// 1.6 +// Redistribution and use in source and binary forms, with or without 1.7 +// modification, are permitted provided that the following conditions are 1.8 +// met: 1.9 +// 1.10 +// * Redistributions of source code must retain the above copyright 1.11 +// notice, this list of conditions and the following disclaimer. 1.12 +// * Redistributions in binary form must reproduce the above 1.13 +// copyright notice, this list of conditions and the following disclaimer 1.14 +// in the documentation and/or other materials provided with the 1.15 +// distribution. 1.16 +// * Neither the name of Google Inc. nor the names of its 1.17 +// contributors may be used to endorse or promote products derived from 1.18 +// this software without specific prior written permission. 1.19 +// 1.20 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.21 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.22 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.23 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.24 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.25 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.26 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.27 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.28 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.29 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.30 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.31 + 1.32 +#include <math.h> 1.33 +#include <stdlib.h> 1.34 + 1.35 + 1.36 +#include <algorithm> 1.37 +#include <string> 1.38 +#include <vector> 1.39 + 1.40 +#include "snappy.h" 1.41 +#include "snappy-internal.h" 1.42 +#include "snappy-test.h" 1.43 +#include "snappy-sinksource.h" 1.44 + 1.45 +DEFINE_int32(start_len, -1, 1.46 + "Starting prefix size for testing (-1: just full file contents)"); 1.47 +DEFINE_int32(end_len, -1, 1.48 + "Starting prefix size for testing (-1: just full file contents)"); 1.49 +DEFINE_int32(bytes, 10485760, 1.50 + "How many bytes to compress/uncompress per file for timing"); 1.51 + 1.52 +DEFINE_bool(zlib, false, 1.53 + "Run zlib compression (http://www.zlib.net)"); 1.54 +DEFINE_bool(lzo, false, 1.55 + "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); 1.56 +DEFINE_bool(quicklz, false, 1.57 + "Run quickLZ compression (http://www.quicklz.com/)"); 1.58 +DEFINE_bool(liblzf, false, 1.59 + "Run libLZF compression " 1.60 + "(http://www.goof.com/pcg/marc/liblzf.html)"); 1.61 +DEFINE_bool(fastlz, false, 1.62 + "Run FastLZ compression (http://www.fastlz.org/"); 1.63 +DEFINE_bool(snappy, true, "Run snappy compression"); 1.64 + 1.65 + 1.66 +DEFINE_bool(write_compressed, false, 1.67 + "Write compressed versions of each file to <file>.comp"); 1.68 +DEFINE_bool(write_uncompressed, false, 1.69 + "Write uncompressed versions of each file to <file>.uncomp"); 1.70 + 1.71 +namespace snappy { 1.72 + 1.73 + 1.74 +#ifdef HAVE_FUNC_MMAP 1.75 + 1.76 +// To test against code that reads beyond its input, this class copies a 1.77 +// string to a newly allocated group of pages, the last of which 1.78 +// is made unreadable via mprotect. Note that we need to allocate the 1.79 +// memory with mmap(), as POSIX allows mprotect() only on memory allocated 1.80 +// with mmap(), and some malloc/posix_memalign implementations expect to 1.81 +// be able to read previously allocated memory while doing heap allocations. 1.82 +class DataEndingAtUnreadablePage { 1.83 + public: 1.84 + explicit DataEndingAtUnreadablePage(const string& s) { 1.85 + const size_t page_size = getpagesize(); 1.86 + const size_t size = s.size(); 1.87 + // Round up space for string to a multiple of page_size. 1.88 + size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); 1.89 + alloc_size_ = space_for_string + page_size; 1.90 + mem_ = mmap(NULL, alloc_size_, 1.91 + PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 1.92 + CHECK_NE(MAP_FAILED, mem_); 1.93 + protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string; 1.94 + char* dst = protected_page_ - size; 1.95 + memcpy(dst, s.data(), size); 1.96 + data_ = dst; 1.97 + size_ = size; 1.98 + // Make guard page unreadable. 1.99 + CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE)); 1.100 + } 1.101 + 1.102 + ~DataEndingAtUnreadablePage() { 1.103 + // Undo the mprotect. 1.104 + CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE)); 1.105 + CHECK_EQ(0, munmap(mem_, alloc_size_)); 1.106 + } 1.107 + 1.108 + const char* data() const { return data_; } 1.109 + size_t size() const { return size_; } 1.110 + 1.111 + private: 1.112 + size_t alloc_size_; 1.113 + void* mem_; 1.114 + char* protected_page_; 1.115 + const char* data_; 1.116 + size_t size_; 1.117 +}; 1.118 + 1.119 +#else // HAVE_FUNC_MMAP 1.120 + 1.121 +// Fallback for systems without mmap. 1.122 +typedef string DataEndingAtUnreadablePage; 1.123 + 1.124 +#endif 1.125 + 1.126 +enum CompressorType { 1.127 + ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY 1.128 +}; 1.129 + 1.130 +const char* names[] = { 1.131 + "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY" 1.132 +}; 1.133 + 1.134 +static size_t MinimumRequiredOutputSpace(size_t input_size, 1.135 + CompressorType comp) { 1.136 + switch (comp) { 1.137 +#ifdef ZLIB_VERSION 1.138 + case ZLIB: 1.139 + return ZLib::MinCompressbufSize(input_size); 1.140 +#endif // ZLIB_VERSION 1.141 + 1.142 +#ifdef LZO_VERSION 1.143 + case LZO: 1.144 + return input_size + input_size/64 + 16 + 3; 1.145 +#endif // LZO_VERSION 1.146 + 1.147 +#ifdef LZF_VERSION 1.148 + case LIBLZF: 1.149 + return input_size; 1.150 +#endif // LZF_VERSION 1.151 + 1.152 +#ifdef QLZ_VERSION_MAJOR 1.153 + case QUICKLZ: 1.154 + return input_size + 36000; // 36000 is used for scratch. 1.155 +#endif // QLZ_VERSION_MAJOR 1.156 + 1.157 +#ifdef FASTLZ_VERSION 1.158 + case FASTLZ: 1.159 + return max(static_cast<int>(ceil(input_size * 1.05)), 66); 1.160 +#endif // FASTLZ_VERSION 1.161 + 1.162 + case SNAPPY: 1.163 + return snappy::MaxCompressedLength(input_size); 1.164 + 1.165 + default: 1.166 + LOG(FATAL) << "Unknown compression type number " << comp; 1.167 + } 1.168 +} 1.169 + 1.170 +// Returns true if we successfully compressed, false otherwise. 1.171 +// 1.172 +// If compressed_is_preallocated is set, do not resize the compressed buffer. 1.173 +// This is typically what you want for a benchmark, in order to not spend 1.174 +// time in the memory allocator. If you do set this flag, however, 1.175 +// "compressed" must be preinitialized to at least MinCompressbufSize(comp) 1.176 +// number of bytes, and may contain junk bytes at the end after return. 1.177 +static bool Compress(const char* input, size_t input_size, CompressorType comp, 1.178 + string* compressed, bool compressed_is_preallocated) { 1.179 + if (!compressed_is_preallocated) { 1.180 + compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); 1.181 + } 1.182 + 1.183 + switch (comp) { 1.184 +#ifdef ZLIB_VERSION 1.185 + case ZLIB: { 1.186 + ZLib zlib; 1.187 + uLongf destlen = compressed->size(); 1.188 + int ret = zlib.Compress( 1.189 + reinterpret_cast<Bytef*>(string_as_array(compressed)), 1.190 + &destlen, 1.191 + reinterpret_cast<const Bytef*>(input), 1.192 + input_size); 1.193 + CHECK_EQ(Z_OK, ret); 1.194 + if (!compressed_is_preallocated) { 1.195 + compressed->resize(destlen); 1.196 + } 1.197 + return true; 1.198 + } 1.199 +#endif // ZLIB_VERSION 1.200 + 1.201 +#ifdef LZO_VERSION 1.202 + case LZO: { 1.203 + unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; 1.204 + lzo_uint destlen; 1.205 + int ret = lzo1x_1_15_compress( 1.206 + reinterpret_cast<const uint8*>(input), 1.207 + input_size, 1.208 + reinterpret_cast<uint8*>(string_as_array(compressed)), 1.209 + &destlen, 1.210 + mem); 1.211 + CHECK_EQ(LZO_E_OK, ret); 1.212 + delete[] mem; 1.213 + if (!compressed_is_preallocated) { 1.214 + compressed->resize(destlen); 1.215 + } 1.216 + break; 1.217 + } 1.218 +#endif // LZO_VERSION 1.219 + 1.220 +#ifdef LZF_VERSION 1.221 + case LIBLZF: { 1.222 + int destlen = lzf_compress(input, 1.223 + input_size, 1.224 + string_as_array(compressed), 1.225 + input_size); 1.226 + if (destlen == 0) { 1.227 + // lzf *can* cause lots of blowup when compressing, so they 1.228 + // recommend to limit outsize to insize, and just not compress 1.229 + // if it's bigger. Ideally, we'd just swap input and output. 1.230 + compressed->assign(input, input_size); 1.231 + destlen = input_size; 1.232 + } 1.233 + if (!compressed_is_preallocated) { 1.234 + compressed->resize(destlen); 1.235 + } 1.236 + break; 1.237 + } 1.238 +#endif // LZF_VERSION 1.239 + 1.240 +#ifdef QLZ_VERSION_MAJOR 1.241 + case QUICKLZ: { 1.242 + qlz_state_compress *state_compress = new qlz_state_compress; 1.243 + int destlen = qlz_compress(input, 1.244 + string_as_array(compressed), 1.245 + input_size, 1.246 + state_compress); 1.247 + delete state_compress; 1.248 + CHECK_NE(0, destlen); 1.249 + if (!compressed_is_preallocated) { 1.250 + compressed->resize(destlen); 1.251 + } 1.252 + break; 1.253 + } 1.254 +#endif // QLZ_VERSION_MAJOR 1.255 + 1.256 +#ifdef FASTLZ_VERSION 1.257 + case FASTLZ: { 1.258 + // Use level 1 compression since we mostly care about speed. 1.259 + int destlen = fastlz_compress_level( 1.260 + 1, 1.261 + input, 1.262 + input_size, 1.263 + string_as_array(compressed)); 1.264 + if (!compressed_is_preallocated) { 1.265 + compressed->resize(destlen); 1.266 + } 1.267 + CHECK_NE(destlen, 0); 1.268 + break; 1.269 + } 1.270 +#endif // FASTLZ_VERSION 1.271 + 1.272 + case SNAPPY: { 1.273 + size_t destlen; 1.274 + snappy::RawCompress(input, input_size, 1.275 + string_as_array(compressed), 1.276 + &destlen); 1.277 + CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); 1.278 + if (!compressed_is_preallocated) { 1.279 + compressed->resize(destlen); 1.280 + } 1.281 + break; 1.282 + } 1.283 + 1.284 + 1.285 + default: { 1.286 + return false; // the asked-for library wasn't compiled in 1.287 + } 1.288 + } 1.289 + return true; 1.290 +} 1.291 + 1.292 +static bool Uncompress(const string& compressed, CompressorType comp, 1.293 + int size, string* output) { 1.294 + switch (comp) { 1.295 +#ifdef ZLIB_VERSION 1.296 + case ZLIB: { 1.297 + output->resize(size); 1.298 + ZLib zlib; 1.299 + uLongf destlen = output->size(); 1.300 + int ret = zlib.Uncompress( 1.301 + reinterpret_cast<Bytef*>(string_as_array(output)), 1.302 + &destlen, 1.303 + reinterpret_cast<const Bytef*>(compressed.data()), 1.304 + compressed.size()); 1.305 + CHECK_EQ(Z_OK, ret); 1.306 + CHECK_EQ(static_cast<uLongf>(size), destlen); 1.307 + break; 1.308 + } 1.309 +#endif // ZLIB_VERSION 1.310 + 1.311 +#ifdef LZO_VERSION 1.312 + case LZO: { 1.313 + output->resize(size); 1.314 + lzo_uint destlen; 1.315 + int ret = lzo1x_decompress( 1.316 + reinterpret_cast<const uint8*>(compressed.data()), 1.317 + compressed.size(), 1.318 + reinterpret_cast<uint8*>(string_as_array(output)), 1.319 + &destlen, 1.320 + NULL); 1.321 + CHECK_EQ(LZO_E_OK, ret); 1.322 + CHECK_EQ(static_cast<lzo_uint>(size), destlen); 1.323 + break; 1.324 + } 1.325 +#endif // LZO_VERSION 1.326 + 1.327 +#ifdef LZF_VERSION 1.328 + case LIBLZF: { 1.329 + output->resize(size); 1.330 + int destlen = lzf_decompress(compressed.data(), 1.331 + compressed.size(), 1.332 + string_as_array(output), 1.333 + output->size()); 1.334 + if (destlen == 0) { 1.335 + // This error probably means we had decided not to compress, 1.336 + // and thus have stored input in output directly. 1.337 + output->assign(compressed.data(), compressed.size()); 1.338 + destlen = compressed.size(); 1.339 + } 1.340 + CHECK_EQ(destlen, size); 1.341 + break; 1.342 + } 1.343 +#endif // LZF_VERSION 1.344 + 1.345 +#ifdef QLZ_VERSION_MAJOR 1.346 + case QUICKLZ: { 1.347 + output->resize(size); 1.348 + qlz_state_decompress *state_decompress = new qlz_state_decompress; 1.349 + int destlen = qlz_decompress(compressed.data(), 1.350 + string_as_array(output), 1.351 + state_decompress); 1.352 + delete state_decompress; 1.353 + CHECK_EQ(destlen, size); 1.354 + break; 1.355 + } 1.356 +#endif // QLZ_VERSION_MAJOR 1.357 + 1.358 +#ifdef FASTLZ_VERSION 1.359 + case FASTLZ: { 1.360 + output->resize(size); 1.361 + int destlen = fastlz_decompress(compressed.data(), 1.362 + compressed.length(), 1.363 + string_as_array(output), 1.364 + size); 1.365 + CHECK_EQ(destlen, size); 1.366 + break; 1.367 + } 1.368 +#endif // FASTLZ_VERSION 1.369 + 1.370 + case SNAPPY: { 1.371 + snappy::RawUncompress(compressed.data(), compressed.size(), 1.372 + string_as_array(output)); 1.373 + break; 1.374 + } 1.375 + 1.376 + 1.377 + default: { 1.378 + return false; // the asked-for library wasn't compiled in 1.379 + } 1.380 + } 1.381 + return true; 1.382 +} 1.383 + 1.384 +static void Measure(const char* data, 1.385 + size_t length, 1.386 + CompressorType comp, 1.387 + int repeats, 1.388 + int block_size) { 1.389 + // Run tests a few time and pick median running times 1.390 + static const int kRuns = 5; 1.391 + double ctime[kRuns]; 1.392 + double utime[kRuns]; 1.393 + int compressed_size = 0; 1.394 + 1.395 + { 1.396 + // Chop the input into blocks 1.397 + int num_blocks = (length + block_size - 1) / block_size; 1.398 + vector<const char*> input(num_blocks); 1.399 + vector<size_t> input_length(num_blocks); 1.400 + vector<string> compressed(num_blocks); 1.401 + vector<string> output(num_blocks); 1.402 + for (int b = 0; b < num_blocks; b++) { 1.403 + int input_start = b * block_size; 1.404 + int input_limit = min<int>((b+1)*block_size, length); 1.405 + input[b] = data+input_start; 1.406 + input_length[b] = input_limit-input_start; 1.407 + 1.408 + // Pre-grow the output buffer so we don't measure string append time. 1.409 + compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp)); 1.410 + } 1.411 + 1.412 + // First, try one trial compression to make sure the code is compiled in 1.413 + if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { 1.414 + LOG(WARNING) << "Skipping " << names[comp] << ": " 1.415 + << "library not compiled in"; 1.416 + return; 1.417 + } 1.418 + 1.419 + for (int run = 0; run < kRuns; run++) { 1.420 + CycleTimer ctimer, utimer; 1.421 + 1.422 + for (int b = 0; b < num_blocks; b++) { 1.423 + // Pre-grow the output buffer so we don't measure string append time. 1.424 + compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp)); 1.425 + } 1.426 + 1.427 + ctimer.Start(); 1.428 + for (int b = 0; b < num_blocks; b++) 1.429 + for (int i = 0; i < repeats; i++) 1.430 + Compress(input[b], input_length[b], comp, &compressed[b], true); 1.431 + ctimer.Stop(); 1.432 + 1.433 + // Compress once more, with resizing, so we don't leave junk 1.434 + // at the end that will confuse the decompressor. 1.435 + for (int b = 0; b < num_blocks; b++) { 1.436 + Compress(input[b], input_length[b], comp, &compressed[b], false); 1.437 + } 1.438 + 1.439 + for (int b = 0; b < num_blocks; b++) { 1.440 + output[b].resize(input_length[b]); 1.441 + } 1.442 + 1.443 + utimer.Start(); 1.444 + for (int i = 0; i < repeats; i++) 1.445 + for (int b = 0; b < num_blocks; b++) 1.446 + Uncompress(compressed[b], comp, input_length[b], &output[b]); 1.447 + utimer.Stop(); 1.448 + 1.449 + ctime[run] = ctimer.Get(); 1.450 + utime[run] = utimer.Get(); 1.451 + } 1.452 + 1.453 + compressed_size = 0; 1.454 + for (int i = 0; i < compressed.size(); i++) { 1.455 + compressed_size += compressed[i].size(); 1.456 + } 1.457 + } 1.458 + 1.459 + sort(ctime, ctime + kRuns); 1.460 + sort(utime, utime + kRuns); 1.461 + const int med = kRuns/2; 1.462 + 1.463 + float comp_rate = (length / ctime[med]) * repeats / 1048576.0; 1.464 + float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; 1.465 + string x = names[comp]; 1.466 + x += ":"; 1.467 + string urate = (uncomp_rate >= 0) 1.468 + ? StringPrintf("%.1f", uncomp_rate) 1.469 + : string("?"); 1.470 + printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " 1.471 + "comp %5.1f MB/s uncomp %5s MB/s\n", 1.472 + x.c_str(), 1.473 + block_size/(1<<20), 1.474 + static_cast<int>(length), static_cast<uint32>(compressed_size), 1.475 + (compressed_size * 100.0) / max<int>(1, length), 1.476 + comp_rate, 1.477 + urate.c_str()); 1.478 +} 1.479 + 1.480 + 1.481 +static int VerifyString(const string& input) { 1.482 + string compressed; 1.483 + DataEndingAtUnreadablePage i(input); 1.484 + const size_t written = snappy::Compress(i.data(), i.size(), &compressed); 1.485 + CHECK_EQ(written, compressed.size()); 1.486 + CHECK_LE(compressed.size(), 1.487 + snappy::MaxCompressedLength(input.size())); 1.488 + CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 1.489 + 1.490 + string uncompressed; 1.491 + DataEndingAtUnreadablePage c(compressed); 1.492 + CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); 1.493 + CHECK_EQ(uncompressed, input); 1.494 + return uncompressed.size(); 1.495 +} 1.496 + 1.497 + 1.498 +// Test that data compressed by a compressor that does not 1.499 +// obey block sizes is uncompressed properly. 1.500 +static void VerifyNonBlockedCompression(const string& input) { 1.501 + if (input.length() > snappy::kBlockSize) { 1.502 + // We cannot test larger blocks than the maximum block size, obviously. 1.503 + return; 1.504 + } 1.505 + 1.506 + string prefix; 1.507 + Varint::Append32(&prefix, input.size()); 1.508 + 1.509 + // Setup compression table 1.510 + snappy::internal::WorkingMemory wmem; 1.511 + int table_size; 1.512 + uint16* table = wmem.GetHashTable(input.size(), &table_size); 1.513 + 1.514 + // Compress entire input in one shot 1.515 + string compressed; 1.516 + compressed += prefix; 1.517 + compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size())); 1.518 + char* dest = string_as_array(&compressed) + prefix.size(); 1.519 + char* end = snappy::internal::CompressFragment(input.data(), input.size(), 1.520 + dest, table, table_size); 1.521 + compressed.resize(end - compressed.data()); 1.522 + 1.523 + // Uncompress into string 1.524 + string uncomp_str; 1.525 + CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str)); 1.526 + CHECK_EQ(uncomp_str, input); 1.527 + 1.528 +} 1.529 + 1.530 +// Expand the input so that it is at least K times as big as block size 1.531 +static string Expand(const string& input) { 1.532 + static const int K = 3; 1.533 + string data = input; 1.534 + while (data.size() < K * snappy::kBlockSize) { 1.535 + data += input; 1.536 + } 1.537 + return data; 1.538 +} 1.539 + 1.540 +static int Verify(const string& input) { 1.541 + VLOG(1) << "Verifying input of size " << input.size(); 1.542 + 1.543 + // Compress using string based routines 1.544 + const int result = VerifyString(input); 1.545 + 1.546 + 1.547 + VerifyNonBlockedCompression(input); 1.548 + if (!input.empty()) { 1.549 + VerifyNonBlockedCompression(Expand(input)); 1.550 + } 1.551 + 1.552 + 1.553 + return result; 1.554 +} 1.555 + 1.556 +// This test checks to ensure that snappy doesn't coredump if it gets 1.557 +// corrupted data. 1.558 + 1.559 +static bool IsValidCompressedBuffer(const string& c) { 1.560 + return snappy::IsValidCompressedBuffer(c.data(), c.size()); 1.561 +} 1.562 +static bool Uncompress(const string& c, string* u) { 1.563 + return snappy::Uncompress(c.data(), c.size(), u); 1.564 +} 1.565 + 1.566 +TYPED_TEST(CorruptedTest, VerifyCorrupted) { 1.567 + string source = "making sure we don't crash with corrupted input"; 1.568 + VLOG(1) << source; 1.569 + string dest; 1.570 + TypeParam uncmp; 1.571 + snappy::Compress(source.data(), source.size(), &dest); 1.572 + 1.573 + // Mess around with the data. It's hard to simulate all possible 1.574 + // corruptions; this is just one example ... 1.575 + CHECK_GT(dest.size(), 3); 1.576 + dest[1]--; 1.577 + dest[3]++; 1.578 + // this really ought to fail. 1.579 + CHECK(!IsValidCompressedBuffer(TypeParam(dest))); 1.580 + CHECK(!Uncompress(TypeParam(dest), &uncmp)); 1.581 + 1.582 + // This is testing for a security bug - a buffer that decompresses to 100k 1.583 + // but we lie in the snappy header and only reserve 0 bytes of memory :) 1.584 + source.resize(100000); 1.585 + for (int i = 0; i < source.length(); ++i) { 1.586 + source[i] = 'A'; 1.587 + } 1.588 + snappy::Compress(source.data(), source.size(), &dest); 1.589 + dest[0] = dest[1] = dest[2] = dest[3] = 0; 1.590 + CHECK(!IsValidCompressedBuffer(TypeParam(dest))); 1.591 + CHECK(!Uncompress(TypeParam(dest), &uncmp)); 1.592 + 1.593 + if (sizeof(void *) == 4) { 1.594 + // Another security check; check a crazy big length can't DoS us with an 1.595 + // over-allocation. 1.596 + // Currently this is done only for 32-bit builds. On 64-bit builds, 1.597 + // where 3GBytes might be an acceptable allocation size, Uncompress() 1.598 + // attempts to decompress, and sometimes causes the test to run out of 1.599 + // memory. 1.600 + dest[0] = dest[1] = dest[2] = dest[3] = 0xff; 1.601 + // This decodes to a really large size, i.e., 3221225471 bytes 1.602 + dest[4] = 'k'; 1.603 + CHECK(!IsValidCompressedBuffer(TypeParam(dest))); 1.604 + CHECK(!Uncompress(TypeParam(dest), &uncmp)); 1.605 + dest[0] = dest[1] = dest[2] = 0xff; 1.606 + dest[3] = 0x7f; 1.607 + CHECK(!IsValidCompressedBuffer(TypeParam(dest))); 1.608 + CHECK(!Uncompress(TypeParam(dest), &uncmp)); 1.609 + } else { 1.610 + LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build"; 1.611 + } 1.612 + 1.613 + // try reading stuff in from a bad file. 1.614 + for (int i = 1; i <= 3; ++i) { 1.615 + string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str()); 1.616 + string uncmp; 1.617 + // check that we don't return a crazy length 1.618 + size_t ulen; 1.619 + CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen) 1.620 + || (ulen < (1<<20))); 1.621 + uint32 ulen2; 1.622 + snappy::ByteArraySource source(data.data(), data.size()); 1.623 + CHECK(!snappy::GetUncompressedLength(&source, &ulen2) || 1.624 + (ulen2 < (1<<20))); 1.625 + CHECK(!IsValidCompressedBuffer(TypeParam(data))); 1.626 + CHECK(!Uncompress(TypeParam(data), &uncmp)); 1.627 + } 1.628 +} 1.629 + 1.630 +// Helper routines to construct arbitrary compressed strings. 1.631 +// These mirror the compression code in snappy.cc, but are copied 1.632 +// here so that we can bypass some limitations in the how snappy.cc 1.633 +// invokes these routines. 1.634 +static void AppendLiteral(string* dst, const string& literal) { 1.635 + if (literal.empty()) return; 1.636 + int n = literal.size() - 1; 1.637 + if (n < 60) { 1.638 + // Fit length in tag byte 1.639 + dst->push_back(0 | (n << 2)); 1.640 + } else { 1.641 + // Encode in upcoming bytes 1.642 + char number[4]; 1.643 + int count = 0; 1.644 + while (n > 0) { 1.645 + number[count++] = n & 0xff; 1.646 + n >>= 8; 1.647 + } 1.648 + dst->push_back(0 | ((59+count) << 2)); 1.649 + *dst += string(number, count); 1.650 + } 1.651 + *dst += literal; 1.652 +} 1.653 + 1.654 +static void AppendCopy(string* dst, int offset, int length) { 1.655 + while (length > 0) { 1.656 + // Figure out how much to copy in one shot 1.657 + int to_copy; 1.658 + if (length >= 68) { 1.659 + to_copy = 64; 1.660 + } else if (length > 64) { 1.661 + to_copy = 60; 1.662 + } else { 1.663 + to_copy = length; 1.664 + } 1.665 + length -= to_copy; 1.666 + 1.667 + if ((to_copy < 12) && (offset < 2048)) { 1.668 + assert(to_copy-4 < 8); // Must fit in 3 bits 1.669 + dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5)); 1.670 + dst->push_back(offset & 0xff); 1.671 + } else if (offset < 65536) { 1.672 + dst->push_back(2 | ((to_copy-1) << 2)); 1.673 + dst->push_back(offset & 0xff); 1.674 + dst->push_back(offset >> 8); 1.675 + } else { 1.676 + dst->push_back(3 | ((to_copy-1) << 2)); 1.677 + dst->push_back(offset & 0xff); 1.678 + dst->push_back((offset >> 8) & 0xff); 1.679 + dst->push_back((offset >> 16) & 0xff); 1.680 + dst->push_back((offset >> 24) & 0xff); 1.681 + } 1.682 + } 1.683 +} 1.684 + 1.685 +TEST(Snappy, SimpleTests) { 1.686 + Verify(""); 1.687 + Verify("a"); 1.688 + Verify("ab"); 1.689 + Verify("abc"); 1.690 + 1.691 + Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc"); 1.692 + Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc"); 1.693 + Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc"); 1.694 + Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); 1.695 + Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); 1.696 +} 1.697 + 1.698 +// Verify max blowup (lots of four-byte copies) 1.699 +TEST(Snappy, MaxBlowup) { 1.700 + string input; 1.701 + for (int i = 0; i < 20000; i++) { 1.702 + ACMRandom rnd(i); 1.703 + uint32 bytes = static_cast<uint32>(rnd.Next()); 1.704 + input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes)); 1.705 + } 1.706 + for (int i = 19999; i >= 0; i--) { 1.707 + ACMRandom rnd(i); 1.708 + uint32 bytes = static_cast<uint32>(rnd.Next()); 1.709 + input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes)); 1.710 + } 1.711 + Verify(input); 1.712 +} 1.713 + 1.714 +TEST(Snappy, RandomData) { 1.715 + ACMRandom rnd(FLAGS_test_random_seed); 1.716 + 1.717 + const int num_ops = 20000; 1.718 + for (int i = 0; i < num_ops; i++) { 1.719 + if ((i % 1000) == 0) { 1.720 + VLOG(0) << "Random op " << i << " of " << num_ops; 1.721 + } 1.722 + 1.723 + string x; 1.724 + int len = rnd.Uniform(4096); 1.725 + if (i < 100) { 1.726 + len = 65536 + rnd.Uniform(65536); 1.727 + } 1.728 + while (x.size() < len) { 1.729 + int run_len = 1; 1.730 + if (rnd.OneIn(10)) { 1.731 + run_len = rnd.Skewed(8); 1.732 + } 1.733 + char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3); 1.734 + while (run_len-- > 0 && x.size() < len) { 1.735 + x += c; 1.736 + } 1.737 + } 1.738 + 1.739 + Verify(x); 1.740 + } 1.741 +} 1.742 + 1.743 +TEST(Snappy, FourByteOffset) { 1.744 + // The new compressor cannot generate four-byte offsets since 1.745 + // it chops up the input into 32KB pieces. So we hand-emit the 1.746 + // copy manually. 1.747 + 1.748 + // The two fragments that make up the input string. 1.749 + string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz"; 1.750 + string fragment2 = "some other string"; 1.751 + 1.752 + // How many times each fragment is emitted. 1.753 + const int n1 = 2; 1.754 + const int n2 = 100000 / fragment2.size(); 1.755 + const int length = n1 * fragment1.size() + n2 * fragment2.size(); 1.756 + 1.757 + string compressed; 1.758 + Varint::Append32(&compressed, length); 1.759 + 1.760 + AppendLiteral(&compressed, fragment1); 1.761 + string src = fragment1; 1.762 + for (int i = 0; i < n2; i++) { 1.763 + AppendLiteral(&compressed, fragment2); 1.764 + src += fragment2; 1.765 + } 1.766 + AppendCopy(&compressed, src.size(), fragment1.size()); 1.767 + src += fragment1; 1.768 + CHECK_EQ(length, src.size()); 1.769 + 1.770 + string uncompressed; 1.771 + CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 1.772 + CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed)); 1.773 + CHECK_EQ(uncompressed, src); 1.774 +} 1.775 + 1.776 + 1.777 +static bool CheckUncompressedLength(const string& compressed, 1.778 + size_t* ulength) { 1.779 + const bool result1 = snappy::GetUncompressedLength(compressed.data(), 1.780 + compressed.size(), 1.781 + ulength); 1.782 + 1.783 + snappy::ByteArraySource source(compressed.data(), compressed.size()); 1.784 + uint32 length; 1.785 + const bool result2 = snappy::GetUncompressedLength(&source, &length); 1.786 + CHECK_EQ(result1, result2); 1.787 + return result1; 1.788 +} 1.789 + 1.790 +TEST(SnappyCorruption, TruncatedVarint) { 1.791 + string compressed, uncompressed; 1.792 + size_t ulength; 1.793 + compressed.push_back('\xf0'); 1.794 + CHECK(!CheckUncompressedLength(compressed, &ulength)); 1.795 + CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 1.796 + CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 1.797 + &uncompressed)); 1.798 +} 1.799 + 1.800 +TEST(SnappyCorruption, UnterminatedVarint) { 1.801 + string compressed, uncompressed; 1.802 + size_t ulength; 1.803 + compressed.push_back(128); 1.804 + compressed.push_back(128); 1.805 + compressed.push_back(128); 1.806 + compressed.push_back(128); 1.807 + compressed.push_back(128); 1.808 + compressed.push_back(10); 1.809 + CHECK(!CheckUncompressedLength(compressed, &ulength)); 1.810 + CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 1.811 + CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 1.812 + &uncompressed)); 1.813 +} 1.814 + 1.815 +TEST(Snappy, ReadPastEndOfBuffer) { 1.816 + // Check that we do not read past end of input 1.817 + 1.818 + // Make a compressed string that ends with a single-byte literal 1.819 + string compressed; 1.820 + Varint::Append32(&compressed, 1); 1.821 + AppendLiteral(&compressed, "x"); 1.822 + 1.823 + string uncompressed; 1.824 + DataEndingAtUnreadablePage c(compressed); 1.825 + CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); 1.826 + CHECK_EQ(uncompressed, string("x")); 1.827 +} 1.828 + 1.829 +// Check for an infinite loop caused by a copy with offset==0 1.830 +TEST(Snappy, ZeroOffsetCopy) { 1.831 + const char* compressed = "\x40\x12\x00\x00"; 1.832 + // \x40 Length (must be > kMaxIncrementCopyOverflow) 1.833 + // \x12\x00\x00 Copy with offset==0, length==5 1.834 + char uncompressed[100]; 1.835 + EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed)); 1.836 +} 1.837 + 1.838 +TEST(Snappy, ZeroOffsetCopyValidation) { 1.839 + const char* compressed = "\x05\x12\x00\x00"; 1.840 + // \x05 Length 1.841 + // \x12\x00\x00 Copy with offset==0, length==5 1.842 + EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4)); 1.843 +} 1.844 + 1.845 + 1.846 +namespace { 1.847 + 1.848 +int TestFindMatchLength(const char* s1, const char *s2, unsigned length) { 1.849 + return snappy::internal::FindMatchLength(s1, s2, s2 + length); 1.850 +} 1.851 + 1.852 +} // namespace 1.853 + 1.854 +TEST(Snappy, FindMatchLength) { 1.855 + // Exercise all different code paths through the function. 1.856 + // 64-bit version: 1.857 + 1.858 + // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop. 1.859 + EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6)); 1.860 + EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11)); 1.861 + 1.862 + // Hit s1_limit in 64-bit loop, find a non-match in single-character loop. 1.863 + EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9)); 1.864 + 1.865 + // Same, but edge cases. 1.866 + EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11)); 1.867 + EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11)); 1.868 + 1.869 + // Find non-match at once in first loop. 1.870 + EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16)); 1.871 + EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16)); 1.872 + EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16)); 1.873 + EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16)); 1.874 + 1.875 + // Find non-match in first loop after one block. 1.876 + EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 1.877 + "abcdefgh?1234567xxxxxxxx", 24)); 1.878 + EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 1.879 + "abcdefgh0?234567xxxxxxxx", 24)); 1.880 + EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 1.881 + "abcdefgh01237654xxxxxxxx", 24)); 1.882 + EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 1.883 + "abcdefgh0123456?xxxxxxxx", 24)); 1.884 + 1.885 + // 32-bit version: 1.886 + 1.887 + // Short matches. 1.888 + EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8)); 1.889 + EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8)); 1.890 + EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8)); 1.891 + EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8)); 1.892 + EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8)); 1.893 + EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8)); 1.894 + EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8)); 1.895 + EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8)); 1.896 + EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7)); 1.897 + EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7)); 1.898 + 1.899 + // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop. 1.900 + EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10)); 1.901 + EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10)); 1.902 + EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13)); 1.903 + 1.904 + // Same, but edge cases. 1.905 + EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12)); 1.906 + EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12)); 1.907 + 1.908 + // Hit s1_limit in 32-bit loop, find a non-match in single-character loop. 1.909 + EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13)); 1.910 + 1.911 + // Find non-match at once in first loop. 1.912 + EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1.913 + "xxxxxx?123xxxxxxxx", 18)); 1.914 + EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1.915 + "xxxxxx0?23xxxxxxxx", 18)); 1.916 + EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1.917 + "xxxxxx0132xxxxxxxx", 18)); 1.918 + EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx", 1.919 + "xxxxxx012?xxxxxxxx", 18)); 1.920 + 1.921 + // Same, but edge cases. 1.922 + EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10)); 1.923 + EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10)); 1.924 + EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10)); 1.925 + EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10)); 1.926 + 1.927 + // Find non-match in first loop after one block. 1.928 + EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx", 1.929 + "xxxxxxabcd?123xx", 16)); 1.930 + EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx", 1.931 + "xxxxxxabcd0?23xx", 16)); 1.932 + EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx", 1.933 + "xxxxxxabcd0132xx", 16)); 1.934 + EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx", 1.935 + "xxxxxxabcd012?xx", 16)); 1.936 + 1.937 + // Same, but edge cases. 1.938 + EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14)); 1.939 + EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14)); 1.940 + EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14)); 1.941 + EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14)); 1.942 +} 1.943 + 1.944 +TEST(Snappy, FindMatchLengthRandom) { 1.945 + const int kNumTrials = 10000; 1.946 + const int kTypicalLength = 10; 1.947 + ACMRandom rnd(FLAGS_test_random_seed); 1.948 + 1.949 + for (int i = 0; i < kNumTrials; i++) { 1.950 + string s, t; 1.951 + char a = rnd.Rand8(); 1.952 + char b = rnd.Rand8(); 1.953 + while (!rnd.OneIn(kTypicalLength)) { 1.954 + s.push_back(rnd.OneIn(2) ? a : b); 1.955 + t.push_back(rnd.OneIn(2) ? a : b); 1.956 + } 1.957 + DataEndingAtUnreadablePage u(s); 1.958 + DataEndingAtUnreadablePage v(t); 1.959 + int matched = snappy::internal::FindMatchLength( 1.960 + u.data(), v.data(), v.data() + t.size()); 1.961 + if (matched == t.size()) { 1.962 + EXPECT_EQ(s, t); 1.963 + } else { 1.964 + EXPECT_NE(s[matched], t[matched]); 1.965 + for (int j = 0; j < matched; j++) { 1.966 + EXPECT_EQ(s[j], t[j]); 1.967 + } 1.968 + } 1.969 + } 1.970 +} 1.971 + 1.972 + 1.973 +static void CompressFile(const char* fname) { 1.974 + string fullinput; 1.975 + File::ReadFileToStringOrDie(fname, &fullinput); 1.976 + 1.977 + string compressed; 1.978 + Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); 1.979 + 1.980 + File::WriteStringToFileOrDie(compressed, 1.981 + string(fname).append(".comp").c_str()); 1.982 +} 1.983 + 1.984 +static void UncompressFile(const char* fname) { 1.985 + string fullinput; 1.986 + File::ReadFileToStringOrDie(fname, &fullinput); 1.987 + 1.988 + size_t uncompLength; 1.989 + CHECK(CheckUncompressedLength(fullinput, &uncompLength)); 1.990 + 1.991 + string uncompressed; 1.992 + uncompressed.resize(uncompLength); 1.993 + CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); 1.994 + 1.995 + File::WriteStringToFileOrDie(uncompressed, 1.996 + string(fname).append(".uncomp").c_str()); 1.997 +} 1.998 + 1.999 +static void MeasureFile(const char* fname) { 1.1000 + string fullinput; 1.1001 + File::ReadFileToStringOrDie(fname, &fullinput); 1.1002 + printf("%-40s :\n", fname); 1.1003 + 1.1004 + int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len; 1.1005 + int end_len = fullinput.size(); 1.1006 + if (FLAGS_end_len >= 0) { 1.1007 + end_len = min<int>(fullinput.size(), FLAGS_end_len); 1.1008 + } 1.1009 + for (int len = start_len; len <= end_len; len++) { 1.1010 + const char* const input = fullinput.data(); 1.1011 + int repeats = (FLAGS_bytes + len) / (len + 1); 1.1012 + if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10); 1.1013 + if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10); 1.1014 + if (FLAGS_liblzf) Measure(input, len, LIBLZF, repeats, 1024<<10); 1.1015 + if (FLAGS_quicklz) Measure(input, len, QUICKLZ, repeats, 1024<<10); 1.1016 + if (FLAGS_fastlz) Measure(input, len, FASTLZ, repeats, 1024<<10); 1.1017 + if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10); 1.1018 + 1.1019 + // For block-size based measurements 1.1020 + if (0 && FLAGS_snappy) { 1.1021 + Measure(input, len, SNAPPY, repeats, 8<<10); 1.1022 + Measure(input, len, SNAPPY, repeats, 16<<10); 1.1023 + Measure(input, len, SNAPPY, repeats, 32<<10); 1.1024 + Measure(input, len, SNAPPY, repeats, 64<<10); 1.1025 + Measure(input, len, SNAPPY, repeats, 256<<10); 1.1026 + Measure(input, len, SNAPPY, repeats, 1024<<10); 1.1027 + } 1.1028 + } 1.1029 +} 1.1030 + 1.1031 +static struct { 1.1032 + const char* label; 1.1033 + const char* filename; 1.1034 +} files[] = { 1.1035 + { "html", "html" }, 1.1036 + { "urls", "urls.10K" }, 1.1037 + { "jpg", "house.jpg" }, 1.1038 + { "pdf", "mapreduce-osdi-1.pdf" }, 1.1039 + { "html4", "html_x_4" }, 1.1040 + { "cp", "cp.html" }, 1.1041 + { "c", "fields.c" }, 1.1042 + { "lsp", "grammar.lsp" }, 1.1043 + { "xls", "kennedy.xls" }, 1.1044 + { "txt1", "alice29.txt" }, 1.1045 + { "txt2", "asyoulik.txt" }, 1.1046 + { "txt3", "lcet10.txt" }, 1.1047 + { "txt4", "plrabn12.txt" }, 1.1048 + { "bin", "ptt5" }, 1.1049 + { "sum", "sum" }, 1.1050 + { "man", "xargs.1" }, 1.1051 + { "pb", "geo.protodata" }, 1.1052 + { "gaviota", "kppkn.gtb" }, 1.1053 +}; 1.1054 + 1.1055 +static void BM_UFlat(int iters, int arg) { 1.1056 + StopBenchmarkTiming(); 1.1057 + 1.1058 + // Pick file to process based on "arg" 1.1059 + CHECK_GE(arg, 0); 1.1060 + CHECK_LT(arg, ARRAYSIZE(files)); 1.1061 + string contents = ReadTestDataFile(files[arg].filename); 1.1062 + 1.1063 + string zcontents; 1.1064 + snappy::Compress(contents.data(), contents.size(), &zcontents); 1.1065 + char* dst = new char[contents.size()]; 1.1066 + 1.1067 + SetBenchmarkBytesProcessed(static_cast<int64>(iters) * 1.1068 + static_cast<int64>(contents.size())); 1.1069 + SetBenchmarkLabel(files[arg].label); 1.1070 + StartBenchmarkTiming(); 1.1071 + while (iters-- > 0) { 1.1072 + CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst)); 1.1073 + } 1.1074 + StopBenchmarkTiming(); 1.1075 + 1.1076 + delete[] dst; 1.1077 +} 1.1078 +BENCHMARK(BM_UFlat)->DenseRange(0, 17); 1.1079 + 1.1080 +static void BM_UValidate(int iters, int arg) { 1.1081 + StopBenchmarkTiming(); 1.1082 + 1.1083 + // Pick file to process based on "arg" 1.1084 + CHECK_GE(arg, 0); 1.1085 + CHECK_LT(arg, ARRAYSIZE(files)); 1.1086 + string contents = ReadTestDataFile(files[arg].filename); 1.1087 + 1.1088 + string zcontents; 1.1089 + snappy::Compress(contents.data(), contents.size(), &zcontents); 1.1090 + 1.1091 + SetBenchmarkBytesProcessed(static_cast<int64>(iters) * 1.1092 + static_cast<int64>(contents.size())); 1.1093 + SetBenchmarkLabel(files[arg].label); 1.1094 + StartBenchmarkTiming(); 1.1095 + while (iters-- > 0) { 1.1096 + CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size())); 1.1097 + } 1.1098 + StopBenchmarkTiming(); 1.1099 +} 1.1100 +BENCHMARK(BM_UValidate)->DenseRange(0, 4); 1.1101 + 1.1102 + 1.1103 +static void BM_ZFlat(int iters, int arg) { 1.1104 + StopBenchmarkTiming(); 1.1105 + 1.1106 + // Pick file to process based on "arg" 1.1107 + CHECK_GE(arg, 0); 1.1108 + CHECK_LT(arg, ARRAYSIZE(files)); 1.1109 + string contents = ReadTestDataFile(files[arg].filename); 1.1110 + 1.1111 + char* dst = new char[snappy::MaxCompressedLength(contents.size())]; 1.1112 + 1.1113 + SetBenchmarkBytesProcessed(static_cast<int64>(iters) * 1.1114 + static_cast<int64>(contents.size())); 1.1115 + StartBenchmarkTiming(); 1.1116 + 1.1117 + size_t zsize = 0; 1.1118 + while (iters-- > 0) { 1.1119 + snappy::RawCompress(contents.data(), contents.size(), dst, &zsize); 1.1120 + } 1.1121 + StopBenchmarkTiming(); 1.1122 + const double compression_ratio = 1.1123 + static_cast<double>(zsize) / std::max<size_t>(1, contents.size()); 1.1124 + SetBenchmarkLabel(StringPrintf("%s (%.2f %%)", 1.1125 + files[arg].label, 100.0 * compression_ratio)); 1.1126 + VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes", 1.1127 + files[arg].label, contents.size(), zsize); 1.1128 + delete[] dst; 1.1129 +} 1.1130 +BENCHMARK(BM_ZFlat)->DenseRange(0, 17); 1.1131 + 1.1132 + 1.1133 +} // namespace snappy 1.1134 + 1.1135 + 1.1136 +int main(int argc, char** argv) { 1.1137 + InitGoogle(argv[0], &argc, &argv, true); 1.1138 + File::Init(); 1.1139 + RunSpecifiedBenchmarks(); 1.1140 + 1.1141 + 1.1142 + if (argc >= 2) { 1.1143 + for (int arg = 1; arg < argc; arg++) { 1.1144 + if (FLAGS_write_compressed) { 1.1145 + CompressFile(argv[arg]); 1.1146 + } else if (FLAGS_write_uncompressed) { 1.1147 + UncompressFile(argv[arg]); 1.1148 + } else { 1.1149 + MeasureFile(argv[arg]); 1.1150 + } 1.1151 + } 1.1152 + return 0; 1.1153 + } 1.1154 + 1.1155 + return RUN_ALL_TESTS(); 1.1156 +}