toolkit/components/url-classifier/HashStore.cpp

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

     1 //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 // Originally based on Chrome sources:
     3 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
     4 //
     5 // Redistribution and use in source and binary forms, with or without
     6 // modification, are permitted provided that the following conditions are
     7 // met:
     8 //
     9 //    * Redistributions of source code must retain the above copyright
    10 // notice, this list of conditions and the following disclaimer.
    11 //    * Redistributions in binary form must reproduce the above
    12 // copyright notice, this list of conditions and the following disclaimer
    13 // in the documentation and/or other materials provided with the
    14 // distribution.
    15 //    * Neither the name of Google Inc. nor the names of its
    16 // contributors may be used to endorse or promote products derived from
    17 // this software without specific prior written permission.
    18 //
    19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    32 #include "HashStore.h"
    33 #include "nsAutoPtr.h"
    34 #include "nsICryptoHash.h"
    35 #include "nsISeekableStream.h"
    36 #include "nsIStreamConverterService.h"
    37 #include "nsNetUtil.h"
    38 #include "nsCheckSummedOutputStream.h"
    39 #include "prlog.h"
    40 #include "zlib.h"
    42 // Main store for SafeBrowsing protocol data. We store
    43 // known add/sub chunks, prefixes and completions in memory
    44 // during an update, and serialize to disk.
    45 // We do not store the add prefixes, those are retrieved by
    46 // decompressing the PrefixSet cache whenever we need to apply
    47 // an update.
    48 //
    49 // byte slicing: Many of the 4-byte values stored here are strongly
    50 // correlated in the upper bytes, and uncorrelated in the lower
    51 // bytes. Because zlib/DEFLATE requires match lengths of at least
    52 // 3 to achieve good compression, and we don't get those if only
    53 // the upper 16-bits are correlated, it is worthwhile to slice 32-bit
    54 // values into 4 1-byte slices and compress the slices individually.
    55 // The slices corresponding to MSBs will compress very well, and the
    56 // slice corresponding to LSB almost nothing. Because of this, we
    57 // only apply DEFLATE to the 3 most significant bytes, and store the
    58 // LSB uncompressed.
    59 //
    60 // byte sliced (numValues) data format:
    61 //    uint32_t compressed-size
    62 //    compressed-size bytes    zlib DEFLATE data
    63 //        0...numValues        byte MSB of 4-byte numValues data
    64 //    uint32_t compressed-size
    65 //    compressed-size bytes    zlib DEFLATE data
    66 //        0...numValues        byte 2nd byte of 4-byte numValues data
    67 //    uint32_t compressed-size
    68 //    compressed-size bytes    zlib DEFLATE data
    69 //        0...numValues        byte 3rd byte of 4-byte numValues data
    70 //    0...numValues            byte LSB of 4-byte numValues data
    71 //
    72 // Store data format:
    73 //    uint32_t magic
    74 //    uint32_t version
    75 //    uint32_t numAddChunks
    76 //    uint32_t numSubChunks
    77 //    uint32_t numAddPrefixes
    78 //    uint32_t numSubPrefixes
    79 //    uint32_t numAddCompletes
    80 //    uint32_t numSubCompletes
    81 //    0...numAddChunks               uint32_t addChunk
    82 //    0...numSubChunks               uint32_t subChunk
    83 //    byte sliced (numAddPrefixes)   uint32_t add chunk of AddPrefixes
    84 //    byte sliced (numSubPrefixes)   uint32_t add chunk of SubPrefixes
    85 //    byte sliced (numSubPrefixes)   uint32_t sub chunk of SubPrefixes
    86 //    byte sliced (numSubPrefixes)   uint32_t SubPrefixes
    87 //    0...numAddCompletes            32-byte Completions + uint32_t addChunk
    88 //    0...numSubCompletes            32-byte Completions + uint32_t addChunk
    89 //                                                       + uint32_t subChunk
    90 //    16-byte MD5 of all preceding data
    92 // Name of the SafeBrowsing store
    93 #define STORE_SUFFIX ".sbstore"
    95 // NSPR_LOG_MODULES=UrlClassifierDbService:5
    96 extern PRLogModuleInfo *gUrlClassifierDbServiceLog;
    97 #if defined(PR_LOGGING)
    98 #define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args)
    99 #define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4)
   100 #else
   101 #define LOG(args)
   102 #define LOG_ENABLED() (false)
   103 #endif
   105 // Either the return was successful or we call the Reset function (unless we
   106 // hit an OOM).  Used while reading in the store.
   107 #define SUCCESS_OR_RESET(res)                                             \
   108   do {                                                                    \
   109     nsresult __rv = res; /* Don't evaluate |res| more than once */        \
   110     if (__rv == NS_ERROR_OUT_OF_MEMORY) {                                 \
   111       NS_WARNING("SafeBrowsing OOM.");                                    \
   112       return __rv;                                                        \
   113     }                                                                     \
   114     if (NS_FAILED(__rv)) {                                                \
   115       NS_WARNING("SafeBrowsing store corrupted or out of date.");         \
   116       Reset();                                                            \
   117       return __rv;                                                        \
   118     }                                                                     \
   119   } while(0)
   121 namespace mozilla {
   122 namespace safebrowsing {
   124 const uint32_t STORE_MAGIC = 0x1231af3b;
   125 const uint32_t CURRENT_VERSION = 3;
   127 void
   128 TableUpdate::NewAddPrefix(uint32_t aAddChunk, const Prefix& aHash)
   129 {
   130   AddPrefix *add = mAddPrefixes.AppendElement();
   131   add->addChunk = aAddChunk;
   132   add->prefix = aHash;
   133 }
   135 void
   136 TableUpdate::NewSubPrefix(uint32_t aAddChunk, const Prefix& aHash, uint32_t aSubChunk)
   137 {
   138   SubPrefix *sub = mSubPrefixes.AppendElement();
   139   sub->addChunk = aAddChunk;
   140   sub->prefix = aHash;
   141   sub->subChunk = aSubChunk;
   142 }
   144 void
   145 TableUpdate::NewAddComplete(uint32_t aAddChunk, const Completion& aHash)
   146 {
   147   AddComplete *add = mAddCompletes.AppendElement();
   148   add->addChunk = aAddChunk;
   149   add->complete = aHash;
   150 }
   152 void
   153 TableUpdate::NewSubComplete(uint32_t aAddChunk, const Completion& aHash, uint32_t aSubChunk)
   154 {
   155   SubComplete *sub = mSubCompletes.AppendElement();
   156   sub->addChunk = aAddChunk;
   157   sub->complete = aHash;
   158   sub->subChunk = aSubChunk;
   159 }
   162 HashStore::HashStore(const nsACString& aTableName, nsIFile* aStoreDir)
   163   : mTableName(aTableName)
   164   , mStoreDirectory(aStoreDir)
   165   , mInUpdate(false)
   166 {
   167 }
   169 HashStore::~HashStore()
   170 {
   171 }
   173 nsresult
   174 HashStore::Reset()
   175 {
   176   LOG(("HashStore resetting"));
   178   nsCOMPtr<nsIFile> storeFile;
   179   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
   180   NS_ENSURE_SUCCESS(rv, rv);
   182   rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(STORE_SUFFIX));
   183   NS_ENSURE_SUCCESS(rv, rv);
   185   rv = storeFile->Remove(false);
   186   NS_ENSURE_SUCCESS(rv, rv);
   188   return NS_OK;
   189 }
   191 nsresult
   192 HashStore::CheckChecksum(nsIFile* aStoreFile,
   193                          uint32_t aFileSize)
   194 {
   195   // Check for file corruption by
   196   // comparing the stored checksum to actual checksum of data
   197   nsAutoCString hash;
   198   nsAutoCString compareHash;
   199   char *data;
   200   uint32_t read;
   202   nsresult rv = CalculateChecksum(hash, aFileSize, true);
   203   NS_ENSURE_SUCCESS(rv, rv);
   205   compareHash.GetMutableData(&data, hash.Length());
   207   if (hash.Length() > aFileSize) {
   208     NS_WARNING("SafeBrowing file not long enough to store its hash");
   209     return NS_ERROR_FAILURE;
   210   }
   211   nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(mInputStream);
   212   rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, aFileSize - hash.Length());
   213   NS_ENSURE_SUCCESS(rv, rv);
   215   rv = mInputStream->Read(data, hash.Length(), &read);
   216   NS_ENSURE_SUCCESS(rv, rv);
   217   NS_ASSERTION(read == hash.Length(), "Could not read hash bytes");
   219   if (!hash.Equals(compareHash)) {
   220     NS_WARNING("Safebrowing file failed checksum.");
   221     return NS_ERROR_FAILURE;
   222   }
   224   return NS_OK;
   225 }
   227 nsresult
   228 HashStore::Open()
   229 {
   230   nsCOMPtr<nsIFile> storeFile;
   231   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
   232   NS_ENSURE_SUCCESS(rv, rv);
   234   rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
   235   NS_ENSURE_SUCCESS(rv, rv);
   237   nsCOMPtr<nsIInputStream> origStream;
   238   rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile,
   239                                   PR_RDONLY | nsIFile::OS_READAHEAD);
   241   if (rv == NS_ERROR_FILE_NOT_FOUND) {
   242     UpdateHeader();
   243     return NS_OK;
   244   } else {
   245     SUCCESS_OR_RESET(rv);
   246   }
   248   int64_t fileSize;
   249   rv = storeFile->GetFileSize(&fileSize);
   250   NS_ENSURE_SUCCESS(rv, rv);
   252   if (fileSize < 0 || fileSize > UINT32_MAX) {
   253     return NS_ERROR_FAILURE;
   254   }
   256   uint32_t fileSize32 = static_cast<uint32_t>(fileSize);
   258   rv = NS_NewBufferedInputStream(getter_AddRefs(mInputStream), origStream,
   259                                  fileSize32);
   260   NS_ENSURE_SUCCESS(rv, rv);
   262   rv = CheckChecksum(storeFile, fileSize32);
   263   SUCCESS_OR_RESET(rv);
   265   rv = ReadHeader();
   266   SUCCESS_OR_RESET(rv);
   268   rv = SanityCheck();
   269   SUCCESS_OR_RESET(rv);
   271   rv = ReadChunkNumbers();
   272   SUCCESS_OR_RESET(rv);
   274   return NS_OK;
   275 }
   277 nsresult
   278 HashStore::ReadHeader()
   279 {
   280   if (!mInputStream) {
   281     UpdateHeader();
   282     return NS_OK;
   283   }
   285   nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
   286   nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
   287   NS_ENSURE_SUCCESS(rv, rv);
   289   void *buffer = &mHeader;
   290   rv = NS_ReadInputStreamToBuffer(mInputStream,
   291                                   &buffer,
   292                                   sizeof(Header));
   293   NS_ENSURE_SUCCESS(rv, rv);
   295   return NS_OK;
   296 }
   298 nsresult
   299 HashStore::SanityCheck()
   300 {
   301   if (mHeader.magic != STORE_MAGIC || mHeader.version != CURRENT_VERSION) {
   302     NS_WARNING("Unexpected header data in the store.");
   303     return NS_ERROR_FAILURE;
   304   }
   306   return NS_OK;
   307 }
   309 nsresult
   310 HashStore::CalculateChecksum(nsAutoCString& aChecksum,
   311                              uint32_t aFileSize,
   312                              bool aChecksumPresent)
   313 {
   314   aChecksum.Truncate();
   316   // Reset mInputStream to start
   317   nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
   318   nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
   320   nsCOMPtr<nsICryptoHash> hash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
   321   NS_ENSURE_SUCCESS(rv, rv);
   323   // Size of MD5 hash in bytes
   324   const uint32_t CHECKSUM_SIZE = 16;
   326   // MD5 is not a secure hash function, but since this is a filesystem integrity
   327   // check, this usage is ok.
   328   rv = hash->Init(nsICryptoHash::MD5);
   329   NS_ENSURE_SUCCESS(rv, rv);
   331   if (!aChecksumPresent) {
   332     // Hash entire file
   333     rv = hash->UpdateFromStream(mInputStream, UINT32_MAX);
   334   } else {
   335     // Hash everything but last checksum bytes
   336     if (aFileSize < CHECKSUM_SIZE) {
   337       NS_WARNING("SafeBrowsing file isn't long enough to store its checksum");
   338       return NS_ERROR_FAILURE;
   339     }
   340     rv = hash->UpdateFromStream(mInputStream, aFileSize - CHECKSUM_SIZE);
   341   }
   342   NS_ENSURE_SUCCESS(rv, rv);
   344   rv = hash->Finish(false, aChecksum);
   345   NS_ENSURE_SUCCESS(rv, rv);
   347   return NS_OK;
   348 }
   350 void
   351 HashStore::UpdateHeader()
   352 {
   353   mHeader.magic = STORE_MAGIC;
   354   mHeader.version = CURRENT_VERSION;
   356   mHeader.numAddChunks = mAddChunks.Length();
   357   mHeader.numSubChunks = mSubChunks.Length();
   358   mHeader.numAddPrefixes = mAddPrefixes.Length();
   359   mHeader.numSubPrefixes = mSubPrefixes.Length();
   360   mHeader.numAddCompletes = mAddCompletes.Length();
   361   mHeader.numSubCompletes = mSubCompletes.Length();
   362 }
   364 nsresult
   365 HashStore::ReadChunkNumbers()
   366 {
   367   NS_ENSURE_STATE(mInputStream);
   369   nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
   370   nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET,
   371                                sizeof(Header));
   373   rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks);
   374   NS_ENSURE_SUCCESS(rv, rv);
   375   NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks, "Read the right amount of add chunks.");
   377   rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks);
   378   NS_ENSURE_SUCCESS(rv, rv);
   379   NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks, "Read the right amount of sub chunks.");
   381   return NS_OK;
   382 }
   384 nsresult
   385 HashStore::ReadHashes()
   386 {
   387   if (!mInputStream) {
   388     // BeginUpdate has been called but Open hasn't initialized mInputStream,
   389     // because the existing HashStore is empty.
   390     return NS_OK;
   391   }
   393   nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
   395   uint32_t offset = sizeof(Header);
   396   offset += (mHeader.numAddChunks + mHeader.numSubChunks) * sizeof(uint32_t);
   397   nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset);
   399   rv = ReadAddPrefixes();
   400   NS_ENSURE_SUCCESS(rv, rv);
   402   rv = ReadSubPrefixes();
   403   NS_ENSURE_SUCCESS(rv, rv);
   405   rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes);
   406   NS_ENSURE_SUCCESS(rv, rv);
   408   rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes);
   409   NS_ENSURE_SUCCESS(rv, rv);
   411   return NS_OK;
   412 }
   414 nsresult
   415 HashStore::BeginUpdate()
   416 {
   417   // Read the rest of the store in memory.
   418   nsresult rv = ReadHashes();
   419   SUCCESS_OR_RESET(rv);
   421   // Close input stream, won't be needed any more and
   422   // we will rewrite ourselves.
   423   if (mInputStream) {
   424     rv = mInputStream->Close();
   425     NS_ENSURE_SUCCESS(rv, rv);
   426   }
   428   mInUpdate = true;
   430   return NS_OK;
   431 }
   433 template<class T>
   434 static nsresult
   435 Merge(ChunkSet* aStoreChunks,
   436       FallibleTArray<T>* aStorePrefixes,
   437       ChunkSet& aUpdateChunks,
   438       FallibleTArray<T>& aUpdatePrefixes,
   439       bool aAllowMerging = false)
   440 {
   441   EntrySort(aUpdatePrefixes);
   443   T* updateIter = aUpdatePrefixes.Elements();
   444   T* updateEnd = aUpdatePrefixes.Elements() + aUpdatePrefixes.Length();
   446   T* storeIter = aStorePrefixes->Elements();
   447   T* storeEnd = aStorePrefixes->Elements() + aStorePrefixes->Length();
   449   // use a separate array so we can keep the iterators valid
   450   // if the nsTArray grows
   451   nsTArray<T> adds;
   453   for (; updateIter != updateEnd; updateIter++) {
   454     // skip this chunk if we already have it, unless we're
   455     // merging completions, in which case we'll always already
   456     // have the chunk from the original prefix
   457     if (aStoreChunks->Has(updateIter->Chunk()))
   458       if (!aAllowMerging)
   459         continue;
   460     // XXX: binary search for insertion point might be faster in common
   461     // case?
   462     while (storeIter < storeEnd && (storeIter->Compare(*updateIter) < 0)) {
   463       // skip forward to matching element (or not...)
   464       storeIter++;
   465     }
   466     // no match, add
   467     if (storeIter == storeEnd
   468         || storeIter->Compare(*updateIter) != 0) {
   469       if (!adds.AppendElement(*updateIter))
   470         return NS_ERROR_OUT_OF_MEMORY;
   471     }
   472   }
   474   // Chunks can be empty, but we should still report we have them
   475   // to make the chunkranges continuous.
   476   aStoreChunks->Merge(aUpdateChunks);
   478   aStorePrefixes->AppendElements(adds);
   479   EntrySort(*aStorePrefixes);
   481   return NS_OK;
   482 }
   484 nsresult
   485 HashStore::ApplyUpdate(TableUpdate &update)
   486 {
   487   nsresult rv = mAddExpirations.Merge(update.AddExpirations());
   488   NS_ENSURE_SUCCESS(rv, rv);
   490   rv = mSubExpirations.Merge(update.SubExpirations());
   491   NS_ENSURE_SUCCESS(rv, rv);
   493   rv = Expire();
   494   NS_ENSURE_SUCCESS(rv, rv);
   496   rv = Merge(&mAddChunks, &mAddPrefixes,
   497              update.AddChunks(), update.AddPrefixes());
   498   NS_ENSURE_SUCCESS(rv, rv);
   500   rv = Merge(&mAddChunks, &mAddCompletes,
   501              update.AddChunks(), update.AddCompletes(), true);
   502   NS_ENSURE_SUCCESS(rv, rv);
   504   rv = Merge(&mSubChunks, &mSubPrefixes,
   505              update.SubChunks(), update.SubPrefixes());
   506   NS_ENSURE_SUCCESS(rv, rv);
   508   rv = Merge(&mSubChunks, &mSubCompletes,
   509              update.SubChunks(), update.SubCompletes(), true);
   510   NS_ENSURE_SUCCESS(rv, rv);
   512   return NS_OK;
   513 }
   515 nsresult
   516 HashStore::Rebuild()
   517 {
   518   NS_ASSERTION(mInUpdate, "Must be in update to rebuild.");
   520   nsresult rv = ProcessSubs();
   521   NS_ENSURE_SUCCESS(rv, rv);
   523   UpdateHeader();
   525   return NS_OK;
   526 }
   528 void
   529 HashStore::ClearCompletes()
   530 {
   531   NS_ASSERTION(mInUpdate, "Must be in update to clear completes.");
   533   mAddCompletes.Clear();
   534   mSubCompletes.Clear();
   536   UpdateHeader();
   537 }
   539 template<class T>
   540 static void
   541 ExpireEntries(FallibleTArray<T>* aEntries, ChunkSet& aExpirations)
   542 {
   543   T* addIter = aEntries->Elements();
   544   T* end = aEntries->Elements() + aEntries->Length();
   546   for (T *iter = addIter; iter != end; iter++) {
   547     if (!aExpirations.Has(iter->Chunk())) {
   548       *addIter = *iter;
   549       addIter++;
   550     }
   551   }
   553   aEntries->SetLength(addIter - aEntries->Elements());
   554 }
   556 nsresult
   557 HashStore::Expire()
   558 {
   559   ExpireEntries(&mAddPrefixes, mAddExpirations);
   560   ExpireEntries(&mAddCompletes, mAddExpirations);
   561   ExpireEntries(&mSubPrefixes, mSubExpirations);
   562   ExpireEntries(&mSubCompletes, mSubExpirations);
   564   mAddChunks.Remove(mAddExpirations);
   565   mSubChunks.Remove(mSubExpirations);
   567   mAddExpirations.Clear();
   568   mSubExpirations.Clear();
   570   return NS_OK;
   571 }
   573 template<class T>
   574 nsresult DeflateWriteTArray(nsIOutputStream* aStream, nsTArray<T>& aIn)
   575 {
   576   uLongf insize = aIn.Length() * sizeof(T);
   577   uLongf outsize = compressBound(insize);
   578   FallibleTArray<char> outBuff;
   579   if (!outBuff.SetLength(outsize)) {
   580     return NS_ERROR_OUT_OF_MEMORY;
   581   }
   583   int zerr = compress(reinterpret_cast<Bytef*>(outBuff.Elements()),
   584                       &outsize,
   585                       reinterpret_cast<const Bytef*>(aIn.Elements()),
   586                       insize);
   587   if (zerr != Z_OK) {
   588     return NS_ERROR_FAILURE;
   589   }
   590   LOG(("DeflateWriteTArray: %d in %d out", insize, outsize));
   592   outBuff.TruncateLength(outsize);
   594   // Length of compressed data stream
   595   uint32_t dataLen = outBuff.Length();
   596   uint32_t written;
   597   nsresult rv = aStream->Write(reinterpret_cast<char*>(&dataLen), sizeof(dataLen), &written);
   598   NS_ENSURE_SUCCESS(rv, rv);
   600   NS_ASSERTION(written == sizeof(dataLen), "Error writing deflate length");
   602   // Store to stream
   603   rv = WriteTArray(aStream, outBuff);
   604   NS_ENSURE_SUCCESS(rv, rv);
   606   return NS_OK;
   607 }
   609 template<class T>
   610 nsresult InflateReadTArray(nsIInputStream* aStream, FallibleTArray<T>* aOut,
   611                            uint32_t aExpectedSize)
   612 {
   614   uint32_t inLen;
   615   uint32_t read;
   616   nsresult rv = aStream->Read(reinterpret_cast<char*>(&inLen), sizeof(inLen), &read);
   617   NS_ENSURE_SUCCESS(rv, rv);
   619   NS_ASSERTION(read == sizeof(inLen), "Error reading inflate length");
   621   FallibleTArray<char> inBuff;
   622   if (!inBuff.SetLength(inLen)) {
   623     return NS_ERROR_OUT_OF_MEMORY;
   624   }
   626   rv = ReadTArray(aStream, &inBuff, inLen);
   627   NS_ENSURE_SUCCESS(rv, rv);
   629   uLongf insize = inLen;
   630   uLongf outsize = aExpectedSize * sizeof(T);
   631   if (!aOut->SetLength(aExpectedSize)) {
   632     return NS_ERROR_OUT_OF_MEMORY;
   633   }
   635   int zerr = uncompress(reinterpret_cast<Bytef*>(aOut->Elements()),
   636                         &outsize,
   637                         reinterpret_cast<const Bytef*>(inBuff.Elements()),
   638                         insize);
   639   if (zerr != Z_OK) {
   640     return NS_ERROR_FAILURE;
   641   }
   642   LOG(("InflateReadTArray: %d in %d out", insize, outsize));
   644   NS_ASSERTION(outsize == aExpectedSize * sizeof(T), "Decompression size mismatch");
   646   return NS_OK;
   647 }
   649 static nsresult
   650 ByteSliceWrite(nsIOutputStream* aOut, nsTArray<uint32_t>& aData)
   651 {
   652   nsTArray<uint8_t> slice1;
   653   nsTArray<uint8_t> slice2;
   654   nsTArray<uint8_t> slice3;
   655   nsTArray<uint8_t> slice4;
   656   uint32_t count = aData.Length();
   658   slice1.SetCapacity(count);
   659   slice2.SetCapacity(count);
   660   slice3.SetCapacity(count);
   661   slice4.SetCapacity(count);
   663   for (uint32_t i = 0; i < count; i++) {
   664     slice1.AppendElement( aData[i] >> 24);
   665     slice2.AppendElement((aData[i] >> 16) & 0xFF);
   666     slice3.AppendElement((aData[i] >>  8) & 0xFF);
   667     slice4.AppendElement( aData[i]        & 0xFF);
   668   }
   670   nsresult rv = DeflateWriteTArray(aOut, slice1);
   671   NS_ENSURE_SUCCESS(rv, rv);
   672   rv = DeflateWriteTArray(aOut, slice2);
   673   NS_ENSURE_SUCCESS(rv, rv);
   674   rv = DeflateWriteTArray(aOut, slice3);
   675   NS_ENSURE_SUCCESS(rv, rv);
   676   // The LSB slice is generally uncompressible, don't bother
   677   // compressing it.
   678   rv = WriteTArray(aOut, slice4);
   679   NS_ENSURE_SUCCESS(rv, rv);
   681   return NS_OK;
   682 }
   684 static nsresult
   685 ByteSliceRead(nsIInputStream* aInStream, FallibleTArray<uint32_t>* aData, uint32_t count)
   686 {
   687   FallibleTArray<uint8_t> slice1;
   688   FallibleTArray<uint8_t> slice2;
   689   FallibleTArray<uint8_t> slice3;
   690   FallibleTArray<uint8_t> slice4;
   692   nsresult rv = InflateReadTArray(aInStream, &slice1, count);
   693   NS_ENSURE_SUCCESS(rv, rv);
   695   rv = InflateReadTArray(aInStream, &slice2, count);
   696   NS_ENSURE_SUCCESS(rv, rv);
   698   rv = InflateReadTArray(aInStream, &slice3, count);
   699   NS_ENSURE_SUCCESS(rv, rv);
   701   rv = ReadTArray(aInStream, &slice4, count);
   702   NS_ENSURE_SUCCESS(rv, rv);
   704   if (!aData->SetCapacity(count)) {
   705     return NS_ERROR_OUT_OF_MEMORY;
   706   }
   708   for (uint32_t i = 0; i < count; i++) {
   709     aData->AppendElement((slice1[i] << 24) | (slice2[i] << 16)
   710                          | (slice3[i] << 8) | (slice4[i]));
   711   }
   713   return NS_OK;
   714 }
   716 nsresult
   717 HashStore::ReadAddPrefixes()
   718 {
   719   FallibleTArray<uint32_t> chunks;
   720   uint32_t count = mHeader.numAddPrefixes;
   722   nsresult rv = ByteSliceRead(mInputStream, &chunks, count);
   723   NS_ENSURE_SUCCESS(rv, rv);
   725   if (!mAddPrefixes.SetCapacity(count)) {
   726     return NS_ERROR_OUT_OF_MEMORY;
   727   }
   728   for (uint32_t i = 0; i < count; i++) {
   729     AddPrefix *add = mAddPrefixes.AppendElement();
   730     add->prefix.FromUint32(0);
   731     add->addChunk = chunks[i];
   732   }
   734   return NS_OK;
   735 }
   737 nsresult
   738 HashStore::ReadSubPrefixes()
   739 {
   740   FallibleTArray<uint32_t> addchunks;
   741   FallibleTArray<uint32_t> subchunks;
   742   FallibleTArray<uint32_t> prefixes;
   743   uint32_t count = mHeader.numSubPrefixes;
   745   nsresult rv = ByteSliceRead(mInputStream, &addchunks, count);
   746   NS_ENSURE_SUCCESS(rv, rv);
   748   rv = ByteSliceRead(mInputStream, &subchunks, count);
   749   NS_ENSURE_SUCCESS(rv, rv);
   751   rv = ByteSliceRead(mInputStream, &prefixes, count);
   752   NS_ENSURE_SUCCESS(rv, rv);
   754   if (!mSubPrefixes.SetCapacity(count)) {
   755     return NS_ERROR_OUT_OF_MEMORY;
   756   }
   757   for (uint32_t i = 0; i < count; i++) {
   758     SubPrefix *sub = mSubPrefixes.AppendElement();
   759     sub->addChunk = addchunks[i];
   760     sub->prefix.FromUint32(prefixes[i]);
   761     sub->subChunk = subchunks[i];
   762   }
   764   return NS_OK;
   765 }
   767 // Split up PrefixArray back into the constituents
   768 nsresult
   769 HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
   770 {
   771   nsTArray<uint32_t> chunks;
   772   uint32_t count = mAddPrefixes.Length();
   773   chunks.SetCapacity(count);
   775   for (uint32_t i = 0; i < count; i++) {
   776     chunks.AppendElement(mAddPrefixes[i].Chunk());
   777   }
   779   nsresult rv = ByteSliceWrite(aOut, chunks);
   780   NS_ENSURE_SUCCESS(rv, rv);
   782   return NS_OK;
   783 }
   785 nsresult
   786 HashStore::WriteSubPrefixes(nsIOutputStream* aOut)
   787 {
   788   nsTArray<uint32_t> addchunks;
   789   nsTArray<uint32_t> subchunks;
   790   nsTArray<uint32_t> prefixes;
   791   uint32_t count = mSubPrefixes.Length();
   792   addchunks.SetCapacity(count);
   793   subchunks.SetCapacity(count);
   794   prefixes.SetCapacity(count);
   796   for (uint32_t i = 0; i < count; i++) {
   797     addchunks.AppendElement(mSubPrefixes[i].AddChunk());
   798     prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32());
   799     subchunks.AppendElement(mSubPrefixes[i].Chunk());
   800   }
   802   nsresult rv = ByteSliceWrite(aOut, addchunks);
   803   NS_ENSURE_SUCCESS(rv, rv);
   805   rv = ByteSliceWrite(aOut, subchunks);
   806   NS_ENSURE_SUCCESS(rv, rv);
   808   rv = ByteSliceWrite(aOut, prefixes);
   809   NS_ENSURE_SUCCESS(rv, rv);
   811   return NS_OK;
   812 }
   814 nsresult
   815 HashStore::WriteFile()
   816 {
   817   NS_ASSERTION(mInUpdate, "Must be in update to write database.");
   819   nsCOMPtr<nsIFile> storeFile;
   820   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
   821   NS_ENSURE_SUCCESS(rv, rv);
   822   rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
   823   NS_ENSURE_SUCCESS(rv, rv);
   825   nsCOMPtr<nsIOutputStream> out;
   826   rv = NS_NewCheckSummedOutputStream(getter_AddRefs(out), storeFile,
   827                                      PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
   828   NS_ENSURE_SUCCESS(rv, rv);
   830   uint32_t written;
   831   rv = out->Write(reinterpret_cast<char*>(&mHeader), sizeof(mHeader), &written);
   832   NS_ENSURE_SUCCESS(rv, rv);
   834   // Write chunk numbers.
   835   rv = mAddChunks.Write(out);
   836   NS_ENSURE_SUCCESS(rv, rv);
   838   rv = mSubChunks.Write(out);
   839   NS_ENSURE_SUCCESS(rv, rv);
   841   // Write hashes.
   842   rv = WriteAddPrefixes(out);
   843   NS_ENSURE_SUCCESS(rv, rv);
   845   rv = WriteSubPrefixes(out);
   846   NS_ENSURE_SUCCESS(rv, rv);
   848   rv = WriteTArray(out, mAddCompletes);
   849   NS_ENSURE_SUCCESS(rv, rv);
   851   rv = WriteTArray(out, mSubCompletes);
   852   NS_ENSURE_SUCCESS(rv, rv);
   854   nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
   855   NS_ENSURE_SUCCESS(rv, rv);
   857   rv = safeOut->Finish();
   858   NS_ENSURE_SUCCESS(rv, rv);
   860   return NS_OK;
   861 }
   863 template <class T>
   864 static void
   865 Erase(FallibleTArray<T>* array, T* iterStart, T* iterEnd)
   866 {
   867   uint32_t start = iterStart - array->Elements();
   868   uint32_t count = iterEnd - iterStart;
   870   if (count > 0) {
   871     array->RemoveElementsAt(start, count);
   872   }
   873 }
   875 // Find items matching between |subs| and |adds|, and remove them,
   876 // recording the item from |adds| in |adds_removed|.  To minimize
   877 // copies, the inputs are processing in parallel, so |subs| and |adds|
   878 // should be compatibly ordered (either by SBAddPrefixLess or
   879 // SBAddPrefixHashLess).
   880 //
   881 // |predAS| provides add < sub, |predSA| provides sub < add, for the
   882 // tightest compare appropriate (see calls in SBProcessSubs).
   883 template<class TSub, class TAdd>
   884 static void
   885 KnockoutSubs(FallibleTArray<TSub>* aSubs, FallibleTArray<TAdd>* aAdds)
   886 {
   887   // Keep a pair of output iterators for writing kept items.  Due to
   888   // deletions, these may lag the main iterators.  Using erase() on
   889   // individual items would result in O(N^2) copies.  Using a list
   890   // would work around that, at double or triple the memory cost.
   891   TAdd* addOut = aAdds->Elements();
   892   TAdd* addIter = aAdds->Elements();
   894   TSub* subOut = aSubs->Elements();
   895   TSub* subIter = aSubs->Elements();
   897   TAdd* addEnd = addIter + aAdds->Length();
   898   TSub* subEnd = subIter + aSubs->Length();
   900   while (addIter != addEnd && subIter != subEnd) {
   901     // additer compare, so it compares on add chunk
   902     int32_t cmp = addIter->Compare(*subIter);
   903     if (cmp > 0) {
   904       // If |*sub_iter| < |*add_iter|, retain the sub.
   905       *subOut = *subIter;
   906       ++subOut;
   907       ++subIter;
   908     } else if (cmp < 0) {
   909       // If |*add_iter| < |*sub_iter|, retain the add.
   910       *addOut = *addIter;
   911       ++addOut;
   912       ++addIter;
   913     } else {
   914       // Drop equal items
   915       ++addIter;
   916       ++subIter;
   917     }
   918   }
   920   Erase(aAdds, addOut, addIter);
   921   Erase(aSubs, subOut, subIter);
   922 }
   924 // Remove items in |removes| from |fullHashes|.  |fullHashes| and
   925 // |removes| should be ordered by SBAddPrefix component.
   926 template <class T>
   927 static void
   928 RemoveMatchingPrefixes(const SubPrefixArray& aSubs, FallibleTArray<T>* aFullHashes)
   929 {
   930   // Where to store kept items.
   931   T* out = aFullHashes->Elements();
   932   T* hashIter = out;
   933   T* hashEnd = aFullHashes->Elements() + aFullHashes->Length();
   935   SubPrefix const * removeIter = aSubs.Elements();
   936   SubPrefix const * removeEnd = aSubs.Elements() + aSubs.Length();
   938   while (hashIter != hashEnd && removeIter != removeEnd) {
   939     int32_t cmp = removeIter->CompareAlt(*hashIter);
   940     if (cmp > 0) {
   941       // Keep items less than |*removeIter|.
   942       *out = *hashIter;
   943       ++out;
   944       ++hashIter;
   945     } else if (cmp < 0) {
   946       // No hit for |*removeIter|, bump it forward.
   947       ++removeIter;
   948     } else {
   949       // Drop equal items, there may be multiple hits.
   950       do {
   951         ++hashIter;
   952       } while (hashIter != hashEnd &&
   953                !(removeIter->CompareAlt(*hashIter) < 0));
   954       ++removeIter;
   955     }
   956   }
   957   Erase(aFullHashes, out, hashIter);
   958 }
   960 static void
   961 RemoveDeadSubPrefixes(SubPrefixArray& aSubs, ChunkSet& aAddChunks)
   962 {
   963   SubPrefix * subIter = aSubs.Elements();
   964   SubPrefix * subEnd = aSubs.Elements() + aSubs.Length();
   966   for (SubPrefix * iter = subIter; iter != subEnd; iter++) {
   967     bool hasChunk = aAddChunks.Has(iter->AddChunk());
   968     // Keep the subprefix if the chunk it refers to is one
   969     // we haven't seen it yet.
   970     if (!hasChunk) {
   971       *subIter = *iter;
   972       subIter++;
   973     }
   974   }
   976   LOG(("Removed %u dead SubPrefix entries.", subEnd - subIter));
   977   aSubs.SetLength(subIter - aSubs.Elements());
   978 }
   980 #ifdef DEBUG
   981 template <class T>
   982 static void EnsureSorted(FallibleTArray<T>* aArray)
   983 {
   984   T* start = aArray->Elements();
   985   T* end = aArray->Elements() + aArray->Length();
   986   T* iter = start;
   987   T* previous = start;
   989   while (iter != end) {
   990     previous = iter;
   991     ++iter;
   992     if (iter != end) {
   993       MOZ_ASSERT(iter->Compare(*previous) >= 0);
   994     }
   995   }
   997   return;
   998 }
   999 #endif
  1001 nsresult
  1002 HashStore::ProcessSubs()
  1004 #ifdef DEBUG
  1005   EnsureSorted(&mAddPrefixes);
  1006   EnsureSorted(&mSubPrefixes);
  1007   EnsureSorted(&mAddCompletes);
  1008   EnsureSorted(&mSubCompletes);
  1009   LOG(("All databases seem to have a consistent sort order."));
  1010 #endif
  1012   RemoveMatchingPrefixes(mSubPrefixes, &mAddCompletes);
  1013   RemoveMatchingPrefixes(mSubPrefixes, &mSubCompletes);
  1015   // Remove any remaining subbed prefixes from both addprefixes
  1016   // and addcompletes.
  1017   KnockoutSubs(&mSubPrefixes, &mAddPrefixes);
  1018   KnockoutSubs(&mSubCompletes, &mAddCompletes);
  1020   // Remove any remaining subprefixes referring to addchunks that
  1021   // we have (and hence have been processed above).
  1022   RemoveDeadSubPrefixes(mSubPrefixes, mAddChunks);
  1024 #ifdef DEBUG
  1025   EnsureSorted(&mAddPrefixes);
  1026   EnsureSorted(&mSubPrefixes);
  1027   EnsureSorted(&mAddCompletes);
  1028   EnsureSorted(&mSubCompletes);
  1029   LOG(("All databases seem to have a consistent sort order."));
  1030 #endif
  1032   return NS_OK;
  1035 nsresult
  1036 HashStore::AugmentAdds(const nsTArray<uint32_t>& aPrefixes)
  1038   uint32_t cnt = aPrefixes.Length();
  1039   if (cnt != mAddPrefixes.Length()) {
  1040     LOG(("Amount of prefixes in cache not consistent with store (%d vs %d)",
  1041          aPrefixes.Length(), mAddPrefixes.Length()));
  1042     return NS_ERROR_FAILURE;
  1044   for (uint32_t i = 0; i < cnt; i++) {
  1045     mAddPrefixes[i].prefix.FromUint32(aPrefixes[i]);
  1047   return NS_OK;

mercurial