1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/components/url-classifier/HashStore.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1051 @@ 1.4 +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +// Originally based on Chrome sources: 1.6 +// Copyright (c) 2010 The Chromium Authors. All rights reserved. 1.7 +// 1.8 +// Redistribution and use in source and binary forms, with or without 1.9 +// modification, are permitted provided that the following conditions are 1.10 +// met: 1.11 +// 1.12 +// * Redistributions of source code must retain the above copyright 1.13 +// notice, this list of conditions and the following disclaimer. 1.14 +// * Redistributions in binary form must reproduce the above 1.15 +// copyright notice, this list of conditions and the following disclaimer 1.16 +// in the documentation and/or other materials provided with the 1.17 +// distribution. 1.18 +// * Neither the name of Google Inc. nor the names of its 1.19 +// contributors may be used to endorse or promote products derived from 1.20 +// this software without specific prior written permission. 1.21 +// 1.22 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.23 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.24 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.25 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.26 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.27 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.28 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.29 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.30 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.31 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.32 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.33 + 1.34 + 1.35 +#include "HashStore.h" 1.36 +#include "nsAutoPtr.h" 1.37 +#include "nsICryptoHash.h" 1.38 +#include "nsISeekableStream.h" 1.39 +#include "nsIStreamConverterService.h" 1.40 +#include "nsNetUtil.h" 1.41 +#include "nsCheckSummedOutputStream.h" 1.42 +#include "prlog.h" 1.43 +#include "zlib.h" 1.44 + 1.45 +// Main store for SafeBrowsing protocol data. We store 1.46 +// known add/sub chunks, prefixes and completions in memory 1.47 +// during an update, and serialize to disk. 1.48 +// We do not store the add prefixes, those are retrieved by 1.49 +// decompressing the PrefixSet cache whenever we need to apply 1.50 +// an update. 1.51 +// 1.52 +// byte slicing: Many of the 4-byte values stored here are strongly 1.53 +// correlated in the upper bytes, and uncorrelated in the lower 1.54 +// bytes. Because zlib/DEFLATE requires match lengths of at least 1.55 +// 3 to achieve good compression, and we don't get those if only 1.56 +// the upper 16-bits are correlated, it is worthwhile to slice 32-bit 1.57 +// values into 4 1-byte slices and compress the slices individually. 1.58 +// The slices corresponding to MSBs will compress very well, and the 1.59 +// slice corresponding to LSB almost nothing. Because of this, we 1.60 +// only apply DEFLATE to the 3 most significant bytes, and store the 1.61 +// LSB uncompressed. 1.62 +// 1.63 +// byte sliced (numValues) data format: 1.64 +// uint32_t compressed-size 1.65 +// compressed-size bytes zlib DEFLATE data 1.66 +// 0...numValues byte MSB of 4-byte numValues data 1.67 +// uint32_t compressed-size 1.68 +// compressed-size bytes zlib DEFLATE data 1.69 +// 0...numValues byte 2nd byte of 4-byte numValues data 1.70 +// uint32_t compressed-size 1.71 +// compressed-size bytes zlib DEFLATE data 1.72 +// 0...numValues byte 3rd byte of 4-byte numValues data 1.73 +// 0...numValues byte LSB of 4-byte numValues data 1.74 +// 1.75 +// Store data format: 1.76 +// uint32_t magic 1.77 +// uint32_t version 1.78 +// uint32_t numAddChunks 1.79 +// uint32_t numSubChunks 1.80 +// uint32_t numAddPrefixes 1.81 +// uint32_t numSubPrefixes 1.82 +// uint32_t numAddCompletes 1.83 +// uint32_t numSubCompletes 1.84 +// 0...numAddChunks uint32_t addChunk 1.85 +// 0...numSubChunks uint32_t subChunk 1.86 +// byte sliced (numAddPrefixes) uint32_t add chunk of AddPrefixes 1.87 +// byte sliced (numSubPrefixes) uint32_t add chunk of SubPrefixes 1.88 +// byte sliced (numSubPrefixes) uint32_t sub chunk of SubPrefixes 1.89 +// byte sliced (numSubPrefixes) uint32_t SubPrefixes 1.90 +// 0...numAddCompletes 32-byte Completions + uint32_t addChunk 1.91 +// 0...numSubCompletes 32-byte Completions + uint32_t addChunk 1.92 +// + uint32_t subChunk 1.93 +// 16-byte MD5 of all preceding data 1.94 + 1.95 +// Name of the SafeBrowsing store 1.96 +#define STORE_SUFFIX ".sbstore" 1.97 + 1.98 +// NSPR_LOG_MODULES=UrlClassifierDbService:5 1.99 +extern PRLogModuleInfo *gUrlClassifierDbServiceLog; 1.100 +#if defined(PR_LOGGING) 1.101 +#define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args) 1.102 +#define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4) 1.103 +#else 1.104 +#define LOG(args) 1.105 +#define LOG_ENABLED() (false) 1.106 +#endif 1.107 + 1.108 +// Either the return was successful or we call the Reset function (unless we 1.109 +// hit an OOM). Used while reading in the store. 1.110 +#define SUCCESS_OR_RESET(res) \ 1.111 + do { \ 1.112 + nsresult __rv = res; /* Don't evaluate |res| more than once */ \ 1.113 + if (__rv == NS_ERROR_OUT_OF_MEMORY) { \ 1.114 + NS_WARNING("SafeBrowsing OOM."); \ 1.115 + return __rv; \ 1.116 + } \ 1.117 + if (NS_FAILED(__rv)) { \ 1.118 + NS_WARNING("SafeBrowsing store corrupted or out of date."); \ 1.119 + Reset(); \ 1.120 + return __rv; \ 1.121 + } \ 1.122 + } while(0) 1.123 + 1.124 +namespace mozilla { 1.125 +namespace safebrowsing { 1.126 + 1.127 +const uint32_t STORE_MAGIC = 0x1231af3b; 1.128 +const uint32_t CURRENT_VERSION = 3; 1.129 + 1.130 +void 1.131 +TableUpdate::NewAddPrefix(uint32_t aAddChunk, const Prefix& aHash) 1.132 +{ 1.133 + AddPrefix *add = mAddPrefixes.AppendElement(); 1.134 + add->addChunk = aAddChunk; 1.135 + add->prefix = aHash; 1.136 +} 1.137 + 1.138 +void 1.139 +TableUpdate::NewSubPrefix(uint32_t aAddChunk, const Prefix& aHash, uint32_t aSubChunk) 1.140 +{ 1.141 + SubPrefix *sub = mSubPrefixes.AppendElement(); 1.142 + sub->addChunk = aAddChunk; 1.143 + sub->prefix = aHash; 1.144 + sub->subChunk = aSubChunk; 1.145 +} 1.146 + 1.147 +void 1.148 +TableUpdate::NewAddComplete(uint32_t aAddChunk, const Completion& aHash) 1.149 +{ 1.150 + AddComplete *add = mAddCompletes.AppendElement(); 1.151 + add->addChunk = aAddChunk; 1.152 + add->complete = aHash; 1.153 +} 1.154 + 1.155 +void 1.156 +TableUpdate::NewSubComplete(uint32_t aAddChunk, const Completion& aHash, uint32_t aSubChunk) 1.157 +{ 1.158 + SubComplete *sub = mSubCompletes.AppendElement(); 1.159 + sub->addChunk = aAddChunk; 1.160 + sub->complete = aHash; 1.161 + sub->subChunk = aSubChunk; 1.162 +} 1.163 + 1.164 + 1.165 +HashStore::HashStore(const nsACString& aTableName, nsIFile* aStoreDir) 1.166 + : mTableName(aTableName) 1.167 + , mStoreDirectory(aStoreDir) 1.168 + , mInUpdate(false) 1.169 +{ 1.170 +} 1.171 + 1.172 +HashStore::~HashStore() 1.173 +{ 1.174 +} 1.175 + 1.176 +nsresult 1.177 +HashStore::Reset() 1.178 +{ 1.179 + LOG(("HashStore resetting")); 1.180 + 1.181 + nsCOMPtr<nsIFile> storeFile; 1.182 + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); 1.183 + NS_ENSURE_SUCCESS(rv, rv); 1.184 + 1.185 + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(STORE_SUFFIX)); 1.186 + NS_ENSURE_SUCCESS(rv, rv); 1.187 + 1.188 + rv = storeFile->Remove(false); 1.189 + NS_ENSURE_SUCCESS(rv, rv); 1.190 + 1.191 + return NS_OK; 1.192 +} 1.193 + 1.194 +nsresult 1.195 +HashStore::CheckChecksum(nsIFile* aStoreFile, 1.196 + uint32_t aFileSize) 1.197 +{ 1.198 + // Check for file corruption by 1.199 + // comparing the stored checksum to actual checksum of data 1.200 + nsAutoCString hash; 1.201 + nsAutoCString compareHash; 1.202 + char *data; 1.203 + uint32_t read; 1.204 + 1.205 + nsresult rv = CalculateChecksum(hash, aFileSize, true); 1.206 + NS_ENSURE_SUCCESS(rv, rv); 1.207 + 1.208 + compareHash.GetMutableData(&data, hash.Length()); 1.209 + 1.210 + if (hash.Length() > aFileSize) { 1.211 + NS_WARNING("SafeBrowing file not long enough to store its hash"); 1.212 + return NS_ERROR_FAILURE; 1.213 + } 1.214 + nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(mInputStream); 1.215 + rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, aFileSize - hash.Length()); 1.216 + NS_ENSURE_SUCCESS(rv, rv); 1.217 + 1.218 + rv = mInputStream->Read(data, hash.Length(), &read); 1.219 + NS_ENSURE_SUCCESS(rv, rv); 1.220 + NS_ASSERTION(read == hash.Length(), "Could not read hash bytes"); 1.221 + 1.222 + if (!hash.Equals(compareHash)) { 1.223 + NS_WARNING("Safebrowing file failed checksum."); 1.224 + return NS_ERROR_FAILURE; 1.225 + } 1.226 + 1.227 + return NS_OK; 1.228 +} 1.229 + 1.230 +nsresult 1.231 +HashStore::Open() 1.232 +{ 1.233 + nsCOMPtr<nsIFile> storeFile; 1.234 + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); 1.235 + NS_ENSURE_SUCCESS(rv, rv); 1.236 + 1.237 + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore")); 1.238 + NS_ENSURE_SUCCESS(rv, rv); 1.239 + 1.240 + nsCOMPtr<nsIInputStream> origStream; 1.241 + rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile, 1.242 + PR_RDONLY | nsIFile::OS_READAHEAD); 1.243 + 1.244 + if (rv == NS_ERROR_FILE_NOT_FOUND) { 1.245 + UpdateHeader(); 1.246 + return NS_OK; 1.247 + } else { 1.248 + SUCCESS_OR_RESET(rv); 1.249 + } 1.250 + 1.251 + int64_t fileSize; 1.252 + rv = storeFile->GetFileSize(&fileSize); 1.253 + NS_ENSURE_SUCCESS(rv, rv); 1.254 + 1.255 + if (fileSize < 0 || fileSize > UINT32_MAX) { 1.256 + return NS_ERROR_FAILURE; 1.257 + } 1.258 + 1.259 + uint32_t fileSize32 = static_cast<uint32_t>(fileSize); 1.260 + 1.261 + rv = NS_NewBufferedInputStream(getter_AddRefs(mInputStream), origStream, 1.262 + fileSize32); 1.263 + NS_ENSURE_SUCCESS(rv, rv); 1.264 + 1.265 + rv = CheckChecksum(storeFile, fileSize32); 1.266 + SUCCESS_OR_RESET(rv); 1.267 + 1.268 + rv = ReadHeader(); 1.269 + SUCCESS_OR_RESET(rv); 1.270 + 1.271 + rv = SanityCheck(); 1.272 + SUCCESS_OR_RESET(rv); 1.273 + 1.274 + rv = ReadChunkNumbers(); 1.275 + SUCCESS_OR_RESET(rv); 1.276 + 1.277 + return NS_OK; 1.278 +} 1.279 + 1.280 +nsresult 1.281 +HashStore::ReadHeader() 1.282 +{ 1.283 + if (!mInputStream) { 1.284 + UpdateHeader(); 1.285 + return NS_OK; 1.286 + } 1.287 + 1.288 + nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream); 1.289 + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0); 1.290 + NS_ENSURE_SUCCESS(rv, rv); 1.291 + 1.292 + void *buffer = &mHeader; 1.293 + rv = NS_ReadInputStreamToBuffer(mInputStream, 1.294 + &buffer, 1.295 + sizeof(Header)); 1.296 + NS_ENSURE_SUCCESS(rv, rv); 1.297 + 1.298 + return NS_OK; 1.299 +} 1.300 + 1.301 +nsresult 1.302 +HashStore::SanityCheck() 1.303 +{ 1.304 + if (mHeader.magic != STORE_MAGIC || mHeader.version != CURRENT_VERSION) { 1.305 + NS_WARNING("Unexpected header data in the store."); 1.306 + return NS_ERROR_FAILURE; 1.307 + } 1.308 + 1.309 + return NS_OK; 1.310 +} 1.311 + 1.312 +nsresult 1.313 +HashStore::CalculateChecksum(nsAutoCString& aChecksum, 1.314 + uint32_t aFileSize, 1.315 + bool aChecksumPresent) 1.316 +{ 1.317 + aChecksum.Truncate(); 1.318 + 1.319 + // Reset mInputStream to start 1.320 + nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream); 1.321 + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0); 1.322 + 1.323 + nsCOMPtr<nsICryptoHash> hash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); 1.324 + NS_ENSURE_SUCCESS(rv, rv); 1.325 + 1.326 + // Size of MD5 hash in bytes 1.327 + const uint32_t CHECKSUM_SIZE = 16; 1.328 + 1.329 + // MD5 is not a secure hash function, but since this is a filesystem integrity 1.330 + // check, this usage is ok. 1.331 + rv = hash->Init(nsICryptoHash::MD5); 1.332 + NS_ENSURE_SUCCESS(rv, rv); 1.333 + 1.334 + if (!aChecksumPresent) { 1.335 + // Hash entire file 1.336 + rv = hash->UpdateFromStream(mInputStream, UINT32_MAX); 1.337 + } else { 1.338 + // Hash everything but last checksum bytes 1.339 + if (aFileSize < CHECKSUM_SIZE) { 1.340 + NS_WARNING("SafeBrowsing file isn't long enough to store its checksum"); 1.341 + return NS_ERROR_FAILURE; 1.342 + } 1.343 + rv = hash->UpdateFromStream(mInputStream, aFileSize - CHECKSUM_SIZE); 1.344 + } 1.345 + NS_ENSURE_SUCCESS(rv, rv); 1.346 + 1.347 + rv = hash->Finish(false, aChecksum); 1.348 + NS_ENSURE_SUCCESS(rv, rv); 1.349 + 1.350 + return NS_OK; 1.351 +} 1.352 + 1.353 +void 1.354 +HashStore::UpdateHeader() 1.355 +{ 1.356 + mHeader.magic = STORE_MAGIC; 1.357 + mHeader.version = CURRENT_VERSION; 1.358 + 1.359 + mHeader.numAddChunks = mAddChunks.Length(); 1.360 + mHeader.numSubChunks = mSubChunks.Length(); 1.361 + mHeader.numAddPrefixes = mAddPrefixes.Length(); 1.362 + mHeader.numSubPrefixes = mSubPrefixes.Length(); 1.363 + mHeader.numAddCompletes = mAddCompletes.Length(); 1.364 + mHeader.numSubCompletes = mSubCompletes.Length(); 1.365 +} 1.366 + 1.367 +nsresult 1.368 +HashStore::ReadChunkNumbers() 1.369 +{ 1.370 + NS_ENSURE_STATE(mInputStream); 1.371 + 1.372 + nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream); 1.373 + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 1.374 + sizeof(Header)); 1.375 + 1.376 + rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks); 1.377 + NS_ENSURE_SUCCESS(rv, rv); 1.378 + NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks, "Read the right amount of add chunks."); 1.379 + 1.380 + rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks); 1.381 + NS_ENSURE_SUCCESS(rv, rv); 1.382 + NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks, "Read the right amount of sub chunks."); 1.383 + 1.384 + return NS_OK; 1.385 +} 1.386 + 1.387 +nsresult 1.388 +HashStore::ReadHashes() 1.389 +{ 1.390 + if (!mInputStream) { 1.391 + // BeginUpdate has been called but Open hasn't initialized mInputStream, 1.392 + // because the existing HashStore is empty. 1.393 + return NS_OK; 1.394 + } 1.395 + 1.396 + nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream); 1.397 + 1.398 + uint32_t offset = sizeof(Header); 1.399 + offset += (mHeader.numAddChunks + mHeader.numSubChunks) * sizeof(uint32_t); 1.400 + nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset); 1.401 + 1.402 + rv = ReadAddPrefixes(); 1.403 + NS_ENSURE_SUCCESS(rv, rv); 1.404 + 1.405 + rv = ReadSubPrefixes(); 1.406 + NS_ENSURE_SUCCESS(rv, rv); 1.407 + 1.408 + rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes); 1.409 + NS_ENSURE_SUCCESS(rv, rv); 1.410 + 1.411 + rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes); 1.412 + NS_ENSURE_SUCCESS(rv, rv); 1.413 + 1.414 + return NS_OK; 1.415 +} 1.416 + 1.417 +nsresult 1.418 +HashStore::BeginUpdate() 1.419 +{ 1.420 + // Read the rest of the store in memory. 1.421 + nsresult rv = ReadHashes(); 1.422 + SUCCESS_OR_RESET(rv); 1.423 + 1.424 + // Close input stream, won't be needed any more and 1.425 + // we will rewrite ourselves. 1.426 + if (mInputStream) { 1.427 + rv = mInputStream->Close(); 1.428 + NS_ENSURE_SUCCESS(rv, rv); 1.429 + } 1.430 + 1.431 + mInUpdate = true; 1.432 + 1.433 + return NS_OK; 1.434 +} 1.435 + 1.436 +template<class T> 1.437 +static nsresult 1.438 +Merge(ChunkSet* aStoreChunks, 1.439 + FallibleTArray<T>* aStorePrefixes, 1.440 + ChunkSet& aUpdateChunks, 1.441 + FallibleTArray<T>& aUpdatePrefixes, 1.442 + bool aAllowMerging = false) 1.443 +{ 1.444 + EntrySort(aUpdatePrefixes); 1.445 + 1.446 + T* updateIter = aUpdatePrefixes.Elements(); 1.447 + T* updateEnd = aUpdatePrefixes.Elements() + aUpdatePrefixes.Length(); 1.448 + 1.449 + T* storeIter = aStorePrefixes->Elements(); 1.450 + T* storeEnd = aStorePrefixes->Elements() + aStorePrefixes->Length(); 1.451 + 1.452 + // use a separate array so we can keep the iterators valid 1.453 + // if the nsTArray grows 1.454 + nsTArray<T> adds; 1.455 + 1.456 + for (; updateIter != updateEnd; updateIter++) { 1.457 + // skip this chunk if we already have it, unless we're 1.458 + // merging completions, in which case we'll always already 1.459 + // have the chunk from the original prefix 1.460 + if (aStoreChunks->Has(updateIter->Chunk())) 1.461 + if (!aAllowMerging) 1.462 + continue; 1.463 + // XXX: binary search for insertion point might be faster in common 1.464 + // case? 1.465 + while (storeIter < storeEnd && (storeIter->Compare(*updateIter) < 0)) { 1.466 + // skip forward to matching element (or not...) 1.467 + storeIter++; 1.468 + } 1.469 + // no match, add 1.470 + if (storeIter == storeEnd 1.471 + || storeIter->Compare(*updateIter) != 0) { 1.472 + if (!adds.AppendElement(*updateIter)) 1.473 + return NS_ERROR_OUT_OF_MEMORY; 1.474 + } 1.475 + } 1.476 + 1.477 + // Chunks can be empty, but we should still report we have them 1.478 + // to make the chunkranges continuous. 1.479 + aStoreChunks->Merge(aUpdateChunks); 1.480 + 1.481 + aStorePrefixes->AppendElements(adds); 1.482 + EntrySort(*aStorePrefixes); 1.483 + 1.484 + return NS_OK; 1.485 +} 1.486 + 1.487 +nsresult 1.488 +HashStore::ApplyUpdate(TableUpdate &update) 1.489 +{ 1.490 + nsresult rv = mAddExpirations.Merge(update.AddExpirations()); 1.491 + NS_ENSURE_SUCCESS(rv, rv); 1.492 + 1.493 + rv = mSubExpirations.Merge(update.SubExpirations()); 1.494 + NS_ENSURE_SUCCESS(rv, rv); 1.495 + 1.496 + rv = Expire(); 1.497 + NS_ENSURE_SUCCESS(rv, rv); 1.498 + 1.499 + rv = Merge(&mAddChunks, &mAddPrefixes, 1.500 + update.AddChunks(), update.AddPrefixes()); 1.501 + NS_ENSURE_SUCCESS(rv, rv); 1.502 + 1.503 + rv = Merge(&mAddChunks, &mAddCompletes, 1.504 + update.AddChunks(), update.AddCompletes(), true); 1.505 + NS_ENSURE_SUCCESS(rv, rv); 1.506 + 1.507 + rv = Merge(&mSubChunks, &mSubPrefixes, 1.508 + update.SubChunks(), update.SubPrefixes()); 1.509 + NS_ENSURE_SUCCESS(rv, rv); 1.510 + 1.511 + rv = Merge(&mSubChunks, &mSubCompletes, 1.512 + update.SubChunks(), update.SubCompletes(), true); 1.513 + NS_ENSURE_SUCCESS(rv, rv); 1.514 + 1.515 + return NS_OK; 1.516 +} 1.517 + 1.518 +nsresult 1.519 +HashStore::Rebuild() 1.520 +{ 1.521 + NS_ASSERTION(mInUpdate, "Must be in update to rebuild."); 1.522 + 1.523 + nsresult rv = ProcessSubs(); 1.524 + NS_ENSURE_SUCCESS(rv, rv); 1.525 + 1.526 + UpdateHeader(); 1.527 + 1.528 + return NS_OK; 1.529 +} 1.530 + 1.531 +void 1.532 +HashStore::ClearCompletes() 1.533 +{ 1.534 + NS_ASSERTION(mInUpdate, "Must be in update to clear completes."); 1.535 + 1.536 + mAddCompletes.Clear(); 1.537 + mSubCompletes.Clear(); 1.538 + 1.539 + UpdateHeader(); 1.540 +} 1.541 + 1.542 +template<class T> 1.543 +static void 1.544 +ExpireEntries(FallibleTArray<T>* aEntries, ChunkSet& aExpirations) 1.545 +{ 1.546 + T* addIter = aEntries->Elements(); 1.547 + T* end = aEntries->Elements() + aEntries->Length(); 1.548 + 1.549 + for (T *iter = addIter; iter != end; iter++) { 1.550 + if (!aExpirations.Has(iter->Chunk())) { 1.551 + *addIter = *iter; 1.552 + addIter++; 1.553 + } 1.554 + } 1.555 + 1.556 + aEntries->SetLength(addIter - aEntries->Elements()); 1.557 +} 1.558 + 1.559 +nsresult 1.560 +HashStore::Expire() 1.561 +{ 1.562 + ExpireEntries(&mAddPrefixes, mAddExpirations); 1.563 + ExpireEntries(&mAddCompletes, mAddExpirations); 1.564 + ExpireEntries(&mSubPrefixes, mSubExpirations); 1.565 + ExpireEntries(&mSubCompletes, mSubExpirations); 1.566 + 1.567 + mAddChunks.Remove(mAddExpirations); 1.568 + mSubChunks.Remove(mSubExpirations); 1.569 + 1.570 + mAddExpirations.Clear(); 1.571 + mSubExpirations.Clear(); 1.572 + 1.573 + return NS_OK; 1.574 +} 1.575 + 1.576 +template<class T> 1.577 +nsresult DeflateWriteTArray(nsIOutputStream* aStream, nsTArray<T>& aIn) 1.578 +{ 1.579 + uLongf insize = aIn.Length() * sizeof(T); 1.580 + uLongf outsize = compressBound(insize); 1.581 + FallibleTArray<char> outBuff; 1.582 + if (!outBuff.SetLength(outsize)) { 1.583 + return NS_ERROR_OUT_OF_MEMORY; 1.584 + } 1.585 + 1.586 + int zerr = compress(reinterpret_cast<Bytef*>(outBuff.Elements()), 1.587 + &outsize, 1.588 + reinterpret_cast<const Bytef*>(aIn.Elements()), 1.589 + insize); 1.590 + if (zerr != Z_OK) { 1.591 + return NS_ERROR_FAILURE; 1.592 + } 1.593 + LOG(("DeflateWriteTArray: %d in %d out", insize, outsize)); 1.594 + 1.595 + outBuff.TruncateLength(outsize); 1.596 + 1.597 + // Length of compressed data stream 1.598 + uint32_t dataLen = outBuff.Length(); 1.599 + uint32_t written; 1.600 + nsresult rv = aStream->Write(reinterpret_cast<char*>(&dataLen), sizeof(dataLen), &written); 1.601 + NS_ENSURE_SUCCESS(rv, rv); 1.602 + 1.603 + NS_ASSERTION(written == sizeof(dataLen), "Error writing deflate length"); 1.604 + 1.605 + // Store to stream 1.606 + rv = WriteTArray(aStream, outBuff); 1.607 + NS_ENSURE_SUCCESS(rv, rv); 1.608 + 1.609 + return NS_OK; 1.610 +} 1.611 + 1.612 +template<class T> 1.613 +nsresult InflateReadTArray(nsIInputStream* aStream, FallibleTArray<T>* aOut, 1.614 + uint32_t aExpectedSize) 1.615 +{ 1.616 + 1.617 + uint32_t inLen; 1.618 + uint32_t read; 1.619 + nsresult rv = aStream->Read(reinterpret_cast<char*>(&inLen), sizeof(inLen), &read); 1.620 + NS_ENSURE_SUCCESS(rv, rv); 1.621 + 1.622 + NS_ASSERTION(read == sizeof(inLen), "Error reading inflate length"); 1.623 + 1.624 + FallibleTArray<char> inBuff; 1.625 + if (!inBuff.SetLength(inLen)) { 1.626 + return NS_ERROR_OUT_OF_MEMORY; 1.627 + } 1.628 + 1.629 + rv = ReadTArray(aStream, &inBuff, inLen); 1.630 + NS_ENSURE_SUCCESS(rv, rv); 1.631 + 1.632 + uLongf insize = inLen; 1.633 + uLongf outsize = aExpectedSize * sizeof(T); 1.634 + if (!aOut->SetLength(aExpectedSize)) { 1.635 + return NS_ERROR_OUT_OF_MEMORY; 1.636 + } 1.637 + 1.638 + int zerr = uncompress(reinterpret_cast<Bytef*>(aOut->Elements()), 1.639 + &outsize, 1.640 + reinterpret_cast<const Bytef*>(inBuff.Elements()), 1.641 + insize); 1.642 + if (zerr != Z_OK) { 1.643 + return NS_ERROR_FAILURE; 1.644 + } 1.645 + LOG(("InflateReadTArray: %d in %d out", insize, outsize)); 1.646 + 1.647 + NS_ASSERTION(outsize == aExpectedSize * sizeof(T), "Decompression size mismatch"); 1.648 + 1.649 + return NS_OK; 1.650 +} 1.651 + 1.652 +static nsresult 1.653 +ByteSliceWrite(nsIOutputStream* aOut, nsTArray<uint32_t>& aData) 1.654 +{ 1.655 + nsTArray<uint8_t> slice1; 1.656 + nsTArray<uint8_t> slice2; 1.657 + nsTArray<uint8_t> slice3; 1.658 + nsTArray<uint8_t> slice4; 1.659 + uint32_t count = aData.Length(); 1.660 + 1.661 + slice1.SetCapacity(count); 1.662 + slice2.SetCapacity(count); 1.663 + slice3.SetCapacity(count); 1.664 + slice4.SetCapacity(count); 1.665 + 1.666 + for (uint32_t i = 0; i < count; i++) { 1.667 + slice1.AppendElement( aData[i] >> 24); 1.668 + slice2.AppendElement((aData[i] >> 16) & 0xFF); 1.669 + slice3.AppendElement((aData[i] >> 8) & 0xFF); 1.670 + slice4.AppendElement( aData[i] & 0xFF); 1.671 + } 1.672 + 1.673 + nsresult rv = DeflateWriteTArray(aOut, slice1); 1.674 + NS_ENSURE_SUCCESS(rv, rv); 1.675 + rv = DeflateWriteTArray(aOut, slice2); 1.676 + NS_ENSURE_SUCCESS(rv, rv); 1.677 + rv = DeflateWriteTArray(aOut, slice3); 1.678 + NS_ENSURE_SUCCESS(rv, rv); 1.679 + // The LSB slice is generally uncompressible, don't bother 1.680 + // compressing it. 1.681 + rv = WriteTArray(aOut, slice4); 1.682 + NS_ENSURE_SUCCESS(rv, rv); 1.683 + 1.684 + return NS_OK; 1.685 +} 1.686 + 1.687 +static nsresult 1.688 +ByteSliceRead(nsIInputStream* aInStream, FallibleTArray<uint32_t>* aData, uint32_t count) 1.689 +{ 1.690 + FallibleTArray<uint8_t> slice1; 1.691 + FallibleTArray<uint8_t> slice2; 1.692 + FallibleTArray<uint8_t> slice3; 1.693 + FallibleTArray<uint8_t> slice4; 1.694 + 1.695 + nsresult rv = InflateReadTArray(aInStream, &slice1, count); 1.696 + NS_ENSURE_SUCCESS(rv, rv); 1.697 + 1.698 + rv = InflateReadTArray(aInStream, &slice2, count); 1.699 + NS_ENSURE_SUCCESS(rv, rv); 1.700 + 1.701 + rv = InflateReadTArray(aInStream, &slice3, count); 1.702 + NS_ENSURE_SUCCESS(rv, rv); 1.703 + 1.704 + rv = ReadTArray(aInStream, &slice4, count); 1.705 + NS_ENSURE_SUCCESS(rv, rv); 1.706 + 1.707 + if (!aData->SetCapacity(count)) { 1.708 + return NS_ERROR_OUT_OF_MEMORY; 1.709 + } 1.710 + 1.711 + for (uint32_t i = 0; i < count; i++) { 1.712 + aData->AppendElement((slice1[i] << 24) | (slice2[i] << 16) 1.713 + | (slice3[i] << 8) | (slice4[i])); 1.714 + } 1.715 + 1.716 + return NS_OK; 1.717 +} 1.718 + 1.719 +nsresult 1.720 +HashStore::ReadAddPrefixes() 1.721 +{ 1.722 + FallibleTArray<uint32_t> chunks; 1.723 + uint32_t count = mHeader.numAddPrefixes; 1.724 + 1.725 + nsresult rv = ByteSliceRead(mInputStream, &chunks, count); 1.726 + NS_ENSURE_SUCCESS(rv, rv); 1.727 + 1.728 + if (!mAddPrefixes.SetCapacity(count)) { 1.729 + return NS_ERROR_OUT_OF_MEMORY; 1.730 + } 1.731 + for (uint32_t i = 0; i < count; i++) { 1.732 + AddPrefix *add = mAddPrefixes.AppendElement(); 1.733 + add->prefix.FromUint32(0); 1.734 + add->addChunk = chunks[i]; 1.735 + } 1.736 + 1.737 + return NS_OK; 1.738 +} 1.739 + 1.740 +nsresult 1.741 +HashStore::ReadSubPrefixes() 1.742 +{ 1.743 + FallibleTArray<uint32_t> addchunks; 1.744 + FallibleTArray<uint32_t> subchunks; 1.745 + FallibleTArray<uint32_t> prefixes; 1.746 + uint32_t count = mHeader.numSubPrefixes; 1.747 + 1.748 + nsresult rv = ByteSliceRead(mInputStream, &addchunks, count); 1.749 + NS_ENSURE_SUCCESS(rv, rv); 1.750 + 1.751 + rv = ByteSliceRead(mInputStream, &subchunks, count); 1.752 + NS_ENSURE_SUCCESS(rv, rv); 1.753 + 1.754 + rv = ByteSliceRead(mInputStream, &prefixes, count); 1.755 + NS_ENSURE_SUCCESS(rv, rv); 1.756 + 1.757 + if (!mSubPrefixes.SetCapacity(count)) { 1.758 + return NS_ERROR_OUT_OF_MEMORY; 1.759 + } 1.760 + for (uint32_t i = 0; i < count; i++) { 1.761 + SubPrefix *sub = mSubPrefixes.AppendElement(); 1.762 + sub->addChunk = addchunks[i]; 1.763 + sub->prefix.FromUint32(prefixes[i]); 1.764 + sub->subChunk = subchunks[i]; 1.765 + } 1.766 + 1.767 + return NS_OK; 1.768 +} 1.769 + 1.770 +// Split up PrefixArray back into the constituents 1.771 +nsresult 1.772 +HashStore::WriteAddPrefixes(nsIOutputStream* aOut) 1.773 +{ 1.774 + nsTArray<uint32_t> chunks; 1.775 + uint32_t count = mAddPrefixes.Length(); 1.776 + chunks.SetCapacity(count); 1.777 + 1.778 + for (uint32_t i = 0; i < count; i++) { 1.779 + chunks.AppendElement(mAddPrefixes[i].Chunk()); 1.780 + } 1.781 + 1.782 + nsresult rv = ByteSliceWrite(aOut, chunks); 1.783 + NS_ENSURE_SUCCESS(rv, rv); 1.784 + 1.785 + return NS_OK; 1.786 +} 1.787 + 1.788 +nsresult 1.789 +HashStore::WriteSubPrefixes(nsIOutputStream* aOut) 1.790 +{ 1.791 + nsTArray<uint32_t> addchunks; 1.792 + nsTArray<uint32_t> subchunks; 1.793 + nsTArray<uint32_t> prefixes; 1.794 + uint32_t count = mSubPrefixes.Length(); 1.795 + addchunks.SetCapacity(count); 1.796 + subchunks.SetCapacity(count); 1.797 + prefixes.SetCapacity(count); 1.798 + 1.799 + for (uint32_t i = 0; i < count; i++) { 1.800 + addchunks.AppendElement(mSubPrefixes[i].AddChunk()); 1.801 + prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32()); 1.802 + subchunks.AppendElement(mSubPrefixes[i].Chunk()); 1.803 + } 1.804 + 1.805 + nsresult rv = ByteSliceWrite(aOut, addchunks); 1.806 + NS_ENSURE_SUCCESS(rv, rv); 1.807 + 1.808 + rv = ByteSliceWrite(aOut, subchunks); 1.809 + NS_ENSURE_SUCCESS(rv, rv); 1.810 + 1.811 + rv = ByteSliceWrite(aOut, prefixes); 1.812 + NS_ENSURE_SUCCESS(rv, rv); 1.813 + 1.814 + return NS_OK; 1.815 +} 1.816 + 1.817 +nsresult 1.818 +HashStore::WriteFile() 1.819 +{ 1.820 + NS_ASSERTION(mInUpdate, "Must be in update to write database."); 1.821 + 1.822 + nsCOMPtr<nsIFile> storeFile; 1.823 + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile)); 1.824 + NS_ENSURE_SUCCESS(rv, rv); 1.825 + rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore")); 1.826 + NS_ENSURE_SUCCESS(rv, rv); 1.827 + 1.828 + nsCOMPtr<nsIOutputStream> out; 1.829 + rv = NS_NewCheckSummedOutputStream(getter_AddRefs(out), storeFile, 1.830 + PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE); 1.831 + NS_ENSURE_SUCCESS(rv, rv); 1.832 + 1.833 + uint32_t written; 1.834 + rv = out->Write(reinterpret_cast<char*>(&mHeader), sizeof(mHeader), &written); 1.835 + NS_ENSURE_SUCCESS(rv, rv); 1.836 + 1.837 + // Write chunk numbers. 1.838 + rv = mAddChunks.Write(out); 1.839 + NS_ENSURE_SUCCESS(rv, rv); 1.840 + 1.841 + rv = mSubChunks.Write(out); 1.842 + NS_ENSURE_SUCCESS(rv, rv); 1.843 + 1.844 + // Write hashes. 1.845 + rv = WriteAddPrefixes(out); 1.846 + NS_ENSURE_SUCCESS(rv, rv); 1.847 + 1.848 + rv = WriteSubPrefixes(out); 1.849 + NS_ENSURE_SUCCESS(rv, rv); 1.850 + 1.851 + rv = WriteTArray(out, mAddCompletes); 1.852 + NS_ENSURE_SUCCESS(rv, rv); 1.853 + 1.854 + rv = WriteTArray(out, mSubCompletes); 1.855 + NS_ENSURE_SUCCESS(rv, rv); 1.856 + 1.857 + nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv); 1.858 + NS_ENSURE_SUCCESS(rv, rv); 1.859 + 1.860 + rv = safeOut->Finish(); 1.861 + NS_ENSURE_SUCCESS(rv, rv); 1.862 + 1.863 + return NS_OK; 1.864 +} 1.865 + 1.866 +template <class T> 1.867 +static void 1.868 +Erase(FallibleTArray<T>* array, T* iterStart, T* iterEnd) 1.869 +{ 1.870 + uint32_t start = iterStart - array->Elements(); 1.871 + uint32_t count = iterEnd - iterStart; 1.872 + 1.873 + if (count > 0) { 1.874 + array->RemoveElementsAt(start, count); 1.875 + } 1.876 +} 1.877 + 1.878 +// Find items matching between |subs| and |adds|, and remove them, 1.879 +// recording the item from |adds| in |adds_removed|. To minimize 1.880 +// copies, the inputs are processing in parallel, so |subs| and |adds| 1.881 +// should be compatibly ordered (either by SBAddPrefixLess or 1.882 +// SBAddPrefixHashLess). 1.883 +// 1.884 +// |predAS| provides add < sub, |predSA| provides sub < add, for the 1.885 +// tightest compare appropriate (see calls in SBProcessSubs). 1.886 +template<class TSub, class TAdd> 1.887 +static void 1.888 +KnockoutSubs(FallibleTArray<TSub>* aSubs, FallibleTArray<TAdd>* aAdds) 1.889 +{ 1.890 + // Keep a pair of output iterators for writing kept items. Due to 1.891 + // deletions, these may lag the main iterators. Using erase() on 1.892 + // individual items would result in O(N^2) copies. Using a list 1.893 + // would work around that, at double or triple the memory cost. 1.894 + TAdd* addOut = aAdds->Elements(); 1.895 + TAdd* addIter = aAdds->Elements(); 1.896 + 1.897 + TSub* subOut = aSubs->Elements(); 1.898 + TSub* subIter = aSubs->Elements(); 1.899 + 1.900 + TAdd* addEnd = addIter + aAdds->Length(); 1.901 + TSub* subEnd = subIter + aSubs->Length(); 1.902 + 1.903 + while (addIter != addEnd && subIter != subEnd) { 1.904 + // additer compare, so it compares on add chunk 1.905 + int32_t cmp = addIter->Compare(*subIter); 1.906 + if (cmp > 0) { 1.907 + // If |*sub_iter| < |*add_iter|, retain the sub. 1.908 + *subOut = *subIter; 1.909 + ++subOut; 1.910 + ++subIter; 1.911 + } else if (cmp < 0) { 1.912 + // If |*add_iter| < |*sub_iter|, retain the add. 1.913 + *addOut = *addIter; 1.914 + ++addOut; 1.915 + ++addIter; 1.916 + } else { 1.917 + // Drop equal items 1.918 + ++addIter; 1.919 + ++subIter; 1.920 + } 1.921 + } 1.922 + 1.923 + Erase(aAdds, addOut, addIter); 1.924 + Erase(aSubs, subOut, subIter); 1.925 +} 1.926 + 1.927 +// Remove items in |removes| from |fullHashes|. |fullHashes| and 1.928 +// |removes| should be ordered by SBAddPrefix component. 1.929 +template <class T> 1.930 +static void 1.931 +RemoveMatchingPrefixes(const SubPrefixArray& aSubs, FallibleTArray<T>* aFullHashes) 1.932 +{ 1.933 + // Where to store kept items. 1.934 + T* out = aFullHashes->Elements(); 1.935 + T* hashIter = out; 1.936 + T* hashEnd = aFullHashes->Elements() + aFullHashes->Length(); 1.937 + 1.938 + SubPrefix const * removeIter = aSubs.Elements(); 1.939 + SubPrefix const * removeEnd = aSubs.Elements() + aSubs.Length(); 1.940 + 1.941 + while (hashIter != hashEnd && removeIter != removeEnd) { 1.942 + int32_t cmp = removeIter->CompareAlt(*hashIter); 1.943 + if (cmp > 0) { 1.944 + // Keep items less than |*removeIter|. 1.945 + *out = *hashIter; 1.946 + ++out; 1.947 + ++hashIter; 1.948 + } else if (cmp < 0) { 1.949 + // No hit for |*removeIter|, bump it forward. 1.950 + ++removeIter; 1.951 + } else { 1.952 + // Drop equal items, there may be multiple hits. 1.953 + do { 1.954 + ++hashIter; 1.955 + } while (hashIter != hashEnd && 1.956 + !(removeIter->CompareAlt(*hashIter) < 0)); 1.957 + ++removeIter; 1.958 + } 1.959 + } 1.960 + Erase(aFullHashes, out, hashIter); 1.961 +} 1.962 + 1.963 +static void 1.964 +RemoveDeadSubPrefixes(SubPrefixArray& aSubs, ChunkSet& aAddChunks) 1.965 +{ 1.966 + SubPrefix * subIter = aSubs.Elements(); 1.967 + SubPrefix * subEnd = aSubs.Elements() + aSubs.Length(); 1.968 + 1.969 + for (SubPrefix * iter = subIter; iter != subEnd; iter++) { 1.970 + bool hasChunk = aAddChunks.Has(iter->AddChunk()); 1.971 + // Keep the subprefix if the chunk it refers to is one 1.972 + // we haven't seen it yet. 1.973 + if (!hasChunk) { 1.974 + *subIter = *iter; 1.975 + subIter++; 1.976 + } 1.977 + } 1.978 + 1.979 + LOG(("Removed %u dead SubPrefix entries.", subEnd - subIter)); 1.980 + aSubs.SetLength(subIter - aSubs.Elements()); 1.981 +} 1.982 + 1.983 +#ifdef DEBUG 1.984 +template <class T> 1.985 +static void EnsureSorted(FallibleTArray<T>* aArray) 1.986 +{ 1.987 + T* start = aArray->Elements(); 1.988 + T* end = aArray->Elements() + aArray->Length(); 1.989 + T* iter = start; 1.990 + T* previous = start; 1.991 + 1.992 + while (iter != end) { 1.993 + previous = iter; 1.994 + ++iter; 1.995 + if (iter != end) { 1.996 + MOZ_ASSERT(iter->Compare(*previous) >= 0); 1.997 + } 1.998 + } 1.999 + 1.1000 + return; 1.1001 +} 1.1002 +#endif 1.1003 + 1.1004 +nsresult 1.1005 +HashStore::ProcessSubs() 1.1006 +{ 1.1007 +#ifdef DEBUG 1.1008 + EnsureSorted(&mAddPrefixes); 1.1009 + EnsureSorted(&mSubPrefixes); 1.1010 + EnsureSorted(&mAddCompletes); 1.1011 + EnsureSorted(&mSubCompletes); 1.1012 + LOG(("All databases seem to have a consistent sort order.")); 1.1013 +#endif 1.1014 + 1.1015 + RemoveMatchingPrefixes(mSubPrefixes, &mAddCompletes); 1.1016 + RemoveMatchingPrefixes(mSubPrefixes, &mSubCompletes); 1.1017 + 1.1018 + // Remove any remaining subbed prefixes from both addprefixes 1.1019 + // and addcompletes. 1.1020 + KnockoutSubs(&mSubPrefixes, &mAddPrefixes); 1.1021 + KnockoutSubs(&mSubCompletes, &mAddCompletes); 1.1022 + 1.1023 + // Remove any remaining subprefixes referring to addchunks that 1.1024 + // we have (and hence have been processed above). 1.1025 + RemoveDeadSubPrefixes(mSubPrefixes, mAddChunks); 1.1026 + 1.1027 +#ifdef DEBUG 1.1028 + EnsureSorted(&mAddPrefixes); 1.1029 + EnsureSorted(&mSubPrefixes); 1.1030 + EnsureSorted(&mAddCompletes); 1.1031 + EnsureSorted(&mSubCompletes); 1.1032 + LOG(("All databases seem to have a consistent sort order.")); 1.1033 +#endif 1.1034 + 1.1035 + return NS_OK; 1.1036 +} 1.1037 + 1.1038 +nsresult 1.1039 +HashStore::AugmentAdds(const nsTArray<uint32_t>& aPrefixes) 1.1040 +{ 1.1041 + uint32_t cnt = aPrefixes.Length(); 1.1042 + if (cnt != mAddPrefixes.Length()) { 1.1043 + LOG(("Amount of prefixes in cache not consistent with store (%d vs %d)", 1.1044 + aPrefixes.Length(), mAddPrefixes.Length())); 1.1045 + return NS_ERROR_FAILURE; 1.1046 + } 1.1047 + for (uint32_t i = 0; i < cnt; i++) { 1.1048 + mAddPrefixes[i].prefix.FromUint32(aPrefixes[i]); 1.1049 + } 1.1050 + return NS_OK; 1.1051 +} 1.1052 + 1.1053 +} 1.1054 +}