Fri, 16 Jan 2015 18:13:44 +0100
Integrate suggestion from review to improve consistency with existing code.
1 //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 // Originally based on Chrome sources:
3 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "HashStore.h"
33 #include "nsAutoPtr.h"
34 #include "nsICryptoHash.h"
35 #include "nsISeekableStream.h"
36 #include "nsIStreamConverterService.h"
37 #include "nsNetUtil.h"
38 #include "nsCheckSummedOutputStream.h"
39 #include "prlog.h"
40 #include "zlib.h"
42 // Main store for SafeBrowsing protocol data. We store
43 // known add/sub chunks, prefixes and completions in memory
44 // during an update, and serialize to disk.
45 // We do not store the add prefixes, those are retrieved by
46 // decompressing the PrefixSet cache whenever we need to apply
47 // an update.
48 //
49 // byte slicing: Many of the 4-byte values stored here are strongly
50 // correlated in the upper bytes, and uncorrelated in the lower
51 // bytes. Because zlib/DEFLATE requires match lengths of at least
52 // 3 to achieve good compression, and we don't get those if only
53 // the upper 16-bits are correlated, it is worthwhile to slice 32-bit
54 // values into 4 1-byte slices and compress the slices individually.
55 // The slices corresponding to MSBs will compress very well, and the
56 // slice corresponding to LSB almost nothing. Because of this, we
57 // only apply DEFLATE to the 3 most significant bytes, and store the
58 // LSB uncompressed.
59 //
60 // byte sliced (numValues) data format:
61 // uint32_t compressed-size
62 // compressed-size bytes zlib DEFLATE data
63 // 0...numValues byte MSB of 4-byte numValues data
64 // uint32_t compressed-size
65 // compressed-size bytes zlib DEFLATE data
66 // 0...numValues byte 2nd byte of 4-byte numValues data
67 // uint32_t compressed-size
68 // compressed-size bytes zlib DEFLATE data
69 // 0...numValues byte 3rd byte of 4-byte numValues data
70 // 0...numValues byte LSB of 4-byte numValues data
71 //
72 // Store data format:
73 // uint32_t magic
74 // uint32_t version
75 // uint32_t numAddChunks
76 // uint32_t numSubChunks
77 // uint32_t numAddPrefixes
78 // uint32_t numSubPrefixes
79 // uint32_t numAddCompletes
80 // uint32_t numSubCompletes
81 // 0...numAddChunks uint32_t addChunk
82 // 0...numSubChunks uint32_t subChunk
83 // byte sliced (numAddPrefixes) uint32_t add chunk of AddPrefixes
84 // byte sliced (numSubPrefixes) uint32_t add chunk of SubPrefixes
85 // byte sliced (numSubPrefixes) uint32_t sub chunk of SubPrefixes
86 // byte sliced (numSubPrefixes) uint32_t SubPrefixes
87 // 0...numAddCompletes 32-byte Completions + uint32_t addChunk
88 // 0...numSubCompletes 32-byte Completions + uint32_t addChunk
89 // + uint32_t subChunk
90 // 16-byte MD5 of all preceding data
92 // Name of the SafeBrowsing store
93 #define STORE_SUFFIX ".sbstore"
95 // NSPR_LOG_MODULES=UrlClassifierDbService:5
96 extern PRLogModuleInfo *gUrlClassifierDbServiceLog;
97 #if defined(PR_LOGGING)
98 #define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args)
99 #define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4)
100 #else
101 #define LOG(args)
102 #define LOG_ENABLED() (false)
103 #endif
105 // Either the return was successful or we call the Reset function (unless we
106 // hit an OOM). Used while reading in the store.
107 #define SUCCESS_OR_RESET(res) \
108 do { \
109 nsresult __rv = res; /* Don't evaluate |res| more than once */ \
110 if (__rv == NS_ERROR_OUT_OF_MEMORY) { \
111 NS_WARNING("SafeBrowsing OOM."); \
112 return __rv; \
113 } \
114 if (NS_FAILED(__rv)) { \
115 NS_WARNING("SafeBrowsing store corrupted or out of date."); \
116 Reset(); \
117 return __rv; \
118 } \
119 } while(0)
121 namespace mozilla {
122 namespace safebrowsing {
124 const uint32_t STORE_MAGIC = 0x1231af3b;
125 const uint32_t CURRENT_VERSION = 3;
127 void
128 TableUpdate::NewAddPrefix(uint32_t aAddChunk, const Prefix& aHash)
129 {
130 AddPrefix *add = mAddPrefixes.AppendElement();
131 add->addChunk = aAddChunk;
132 add->prefix = aHash;
133 }
135 void
136 TableUpdate::NewSubPrefix(uint32_t aAddChunk, const Prefix& aHash, uint32_t aSubChunk)
137 {
138 SubPrefix *sub = mSubPrefixes.AppendElement();
139 sub->addChunk = aAddChunk;
140 sub->prefix = aHash;
141 sub->subChunk = aSubChunk;
142 }
144 void
145 TableUpdate::NewAddComplete(uint32_t aAddChunk, const Completion& aHash)
146 {
147 AddComplete *add = mAddCompletes.AppendElement();
148 add->addChunk = aAddChunk;
149 add->complete = aHash;
150 }
152 void
153 TableUpdate::NewSubComplete(uint32_t aAddChunk, const Completion& aHash, uint32_t aSubChunk)
154 {
155 SubComplete *sub = mSubCompletes.AppendElement();
156 sub->addChunk = aAddChunk;
157 sub->complete = aHash;
158 sub->subChunk = aSubChunk;
159 }
162 HashStore::HashStore(const nsACString& aTableName, nsIFile* aStoreDir)
163 : mTableName(aTableName)
164 , mStoreDirectory(aStoreDir)
165 , mInUpdate(false)
166 {
167 }
169 HashStore::~HashStore()
170 {
171 }
173 nsresult
174 HashStore::Reset()
175 {
176 LOG(("HashStore resetting"));
178 nsCOMPtr<nsIFile> storeFile;
179 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
180 NS_ENSURE_SUCCESS(rv, rv);
182 rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(STORE_SUFFIX));
183 NS_ENSURE_SUCCESS(rv, rv);
185 rv = storeFile->Remove(false);
186 NS_ENSURE_SUCCESS(rv, rv);
188 return NS_OK;
189 }
191 nsresult
192 HashStore::CheckChecksum(nsIFile* aStoreFile,
193 uint32_t aFileSize)
194 {
195 // Check for file corruption by
196 // comparing the stored checksum to actual checksum of data
197 nsAutoCString hash;
198 nsAutoCString compareHash;
199 char *data;
200 uint32_t read;
202 nsresult rv = CalculateChecksum(hash, aFileSize, true);
203 NS_ENSURE_SUCCESS(rv, rv);
205 compareHash.GetMutableData(&data, hash.Length());
207 if (hash.Length() > aFileSize) {
208 NS_WARNING("SafeBrowing file not long enough to store its hash");
209 return NS_ERROR_FAILURE;
210 }
211 nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(mInputStream);
212 rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, aFileSize - hash.Length());
213 NS_ENSURE_SUCCESS(rv, rv);
215 rv = mInputStream->Read(data, hash.Length(), &read);
216 NS_ENSURE_SUCCESS(rv, rv);
217 NS_ASSERTION(read == hash.Length(), "Could not read hash bytes");
219 if (!hash.Equals(compareHash)) {
220 NS_WARNING("Safebrowing file failed checksum.");
221 return NS_ERROR_FAILURE;
222 }
224 return NS_OK;
225 }
227 nsresult
228 HashStore::Open()
229 {
230 nsCOMPtr<nsIFile> storeFile;
231 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
232 NS_ENSURE_SUCCESS(rv, rv);
234 rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
235 NS_ENSURE_SUCCESS(rv, rv);
237 nsCOMPtr<nsIInputStream> origStream;
238 rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile,
239 PR_RDONLY | nsIFile::OS_READAHEAD);
241 if (rv == NS_ERROR_FILE_NOT_FOUND) {
242 UpdateHeader();
243 return NS_OK;
244 } else {
245 SUCCESS_OR_RESET(rv);
246 }
248 int64_t fileSize;
249 rv = storeFile->GetFileSize(&fileSize);
250 NS_ENSURE_SUCCESS(rv, rv);
252 if (fileSize < 0 || fileSize > UINT32_MAX) {
253 return NS_ERROR_FAILURE;
254 }
256 uint32_t fileSize32 = static_cast<uint32_t>(fileSize);
258 rv = NS_NewBufferedInputStream(getter_AddRefs(mInputStream), origStream,
259 fileSize32);
260 NS_ENSURE_SUCCESS(rv, rv);
262 rv = CheckChecksum(storeFile, fileSize32);
263 SUCCESS_OR_RESET(rv);
265 rv = ReadHeader();
266 SUCCESS_OR_RESET(rv);
268 rv = SanityCheck();
269 SUCCESS_OR_RESET(rv);
271 rv = ReadChunkNumbers();
272 SUCCESS_OR_RESET(rv);
274 return NS_OK;
275 }
277 nsresult
278 HashStore::ReadHeader()
279 {
280 if (!mInputStream) {
281 UpdateHeader();
282 return NS_OK;
283 }
285 nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
286 nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
287 NS_ENSURE_SUCCESS(rv, rv);
289 void *buffer = &mHeader;
290 rv = NS_ReadInputStreamToBuffer(mInputStream,
291 &buffer,
292 sizeof(Header));
293 NS_ENSURE_SUCCESS(rv, rv);
295 return NS_OK;
296 }
298 nsresult
299 HashStore::SanityCheck()
300 {
301 if (mHeader.magic != STORE_MAGIC || mHeader.version != CURRENT_VERSION) {
302 NS_WARNING("Unexpected header data in the store.");
303 return NS_ERROR_FAILURE;
304 }
306 return NS_OK;
307 }
309 nsresult
310 HashStore::CalculateChecksum(nsAutoCString& aChecksum,
311 uint32_t aFileSize,
312 bool aChecksumPresent)
313 {
314 aChecksum.Truncate();
316 // Reset mInputStream to start
317 nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
318 nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
320 nsCOMPtr<nsICryptoHash> hash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
321 NS_ENSURE_SUCCESS(rv, rv);
323 // Size of MD5 hash in bytes
324 const uint32_t CHECKSUM_SIZE = 16;
326 // MD5 is not a secure hash function, but since this is a filesystem integrity
327 // check, this usage is ok.
328 rv = hash->Init(nsICryptoHash::MD5);
329 NS_ENSURE_SUCCESS(rv, rv);
331 if (!aChecksumPresent) {
332 // Hash entire file
333 rv = hash->UpdateFromStream(mInputStream, UINT32_MAX);
334 } else {
335 // Hash everything but last checksum bytes
336 if (aFileSize < CHECKSUM_SIZE) {
337 NS_WARNING("SafeBrowsing file isn't long enough to store its checksum");
338 return NS_ERROR_FAILURE;
339 }
340 rv = hash->UpdateFromStream(mInputStream, aFileSize - CHECKSUM_SIZE);
341 }
342 NS_ENSURE_SUCCESS(rv, rv);
344 rv = hash->Finish(false, aChecksum);
345 NS_ENSURE_SUCCESS(rv, rv);
347 return NS_OK;
348 }
350 void
351 HashStore::UpdateHeader()
352 {
353 mHeader.magic = STORE_MAGIC;
354 mHeader.version = CURRENT_VERSION;
356 mHeader.numAddChunks = mAddChunks.Length();
357 mHeader.numSubChunks = mSubChunks.Length();
358 mHeader.numAddPrefixes = mAddPrefixes.Length();
359 mHeader.numSubPrefixes = mSubPrefixes.Length();
360 mHeader.numAddCompletes = mAddCompletes.Length();
361 mHeader.numSubCompletes = mSubCompletes.Length();
362 }
364 nsresult
365 HashStore::ReadChunkNumbers()
366 {
367 NS_ENSURE_STATE(mInputStream);
369 nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
370 nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET,
371 sizeof(Header));
373 rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks);
374 NS_ENSURE_SUCCESS(rv, rv);
375 NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks, "Read the right amount of add chunks.");
377 rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks);
378 NS_ENSURE_SUCCESS(rv, rv);
379 NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks, "Read the right amount of sub chunks.");
381 return NS_OK;
382 }
384 nsresult
385 HashStore::ReadHashes()
386 {
387 if (!mInputStream) {
388 // BeginUpdate has been called but Open hasn't initialized mInputStream,
389 // because the existing HashStore is empty.
390 return NS_OK;
391 }
393 nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
395 uint32_t offset = sizeof(Header);
396 offset += (mHeader.numAddChunks + mHeader.numSubChunks) * sizeof(uint32_t);
397 nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset);
399 rv = ReadAddPrefixes();
400 NS_ENSURE_SUCCESS(rv, rv);
402 rv = ReadSubPrefixes();
403 NS_ENSURE_SUCCESS(rv, rv);
405 rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes);
406 NS_ENSURE_SUCCESS(rv, rv);
408 rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes);
409 NS_ENSURE_SUCCESS(rv, rv);
411 return NS_OK;
412 }
414 nsresult
415 HashStore::BeginUpdate()
416 {
417 // Read the rest of the store in memory.
418 nsresult rv = ReadHashes();
419 SUCCESS_OR_RESET(rv);
421 // Close input stream, won't be needed any more and
422 // we will rewrite ourselves.
423 if (mInputStream) {
424 rv = mInputStream->Close();
425 NS_ENSURE_SUCCESS(rv, rv);
426 }
428 mInUpdate = true;
430 return NS_OK;
431 }
433 template<class T>
434 static nsresult
435 Merge(ChunkSet* aStoreChunks,
436 FallibleTArray<T>* aStorePrefixes,
437 ChunkSet& aUpdateChunks,
438 FallibleTArray<T>& aUpdatePrefixes,
439 bool aAllowMerging = false)
440 {
441 EntrySort(aUpdatePrefixes);
443 T* updateIter = aUpdatePrefixes.Elements();
444 T* updateEnd = aUpdatePrefixes.Elements() + aUpdatePrefixes.Length();
446 T* storeIter = aStorePrefixes->Elements();
447 T* storeEnd = aStorePrefixes->Elements() + aStorePrefixes->Length();
449 // use a separate array so we can keep the iterators valid
450 // if the nsTArray grows
451 nsTArray<T> adds;
453 for (; updateIter != updateEnd; updateIter++) {
454 // skip this chunk if we already have it, unless we're
455 // merging completions, in which case we'll always already
456 // have the chunk from the original prefix
457 if (aStoreChunks->Has(updateIter->Chunk()))
458 if (!aAllowMerging)
459 continue;
460 // XXX: binary search for insertion point might be faster in common
461 // case?
462 while (storeIter < storeEnd && (storeIter->Compare(*updateIter) < 0)) {
463 // skip forward to matching element (or not...)
464 storeIter++;
465 }
466 // no match, add
467 if (storeIter == storeEnd
468 || storeIter->Compare(*updateIter) != 0) {
469 if (!adds.AppendElement(*updateIter))
470 return NS_ERROR_OUT_OF_MEMORY;
471 }
472 }
474 // Chunks can be empty, but we should still report we have them
475 // to make the chunkranges continuous.
476 aStoreChunks->Merge(aUpdateChunks);
478 aStorePrefixes->AppendElements(adds);
479 EntrySort(*aStorePrefixes);
481 return NS_OK;
482 }
484 nsresult
485 HashStore::ApplyUpdate(TableUpdate &update)
486 {
487 nsresult rv = mAddExpirations.Merge(update.AddExpirations());
488 NS_ENSURE_SUCCESS(rv, rv);
490 rv = mSubExpirations.Merge(update.SubExpirations());
491 NS_ENSURE_SUCCESS(rv, rv);
493 rv = Expire();
494 NS_ENSURE_SUCCESS(rv, rv);
496 rv = Merge(&mAddChunks, &mAddPrefixes,
497 update.AddChunks(), update.AddPrefixes());
498 NS_ENSURE_SUCCESS(rv, rv);
500 rv = Merge(&mAddChunks, &mAddCompletes,
501 update.AddChunks(), update.AddCompletes(), true);
502 NS_ENSURE_SUCCESS(rv, rv);
504 rv = Merge(&mSubChunks, &mSubPrefixes,
505 update.SubChunks(), update.SubPrefixes());
506 NS_ENSURE_SUCCESS(rv, rv);
508 rv = Merge(&mSubChunks, &mSubCompletes,
509 update.SubChunks(), update.SubCompletes(), true);
510 NS_ENSURE_SUCCESS(rv, rv);
512 return NS_OK;
513 }
515 nsresult
516 HashStore::Rebuild()
517 {
518 NS_ASSERTION(mInUpdate, "Must be in update to rebuild.");
520 nsresult rv = ProcessSubs();
521 NS_ENSURE_SUCCESS(rv, rv);
523 UpdateHeader();
525 return NS_OK;
526 }
528 void
529 HashStore::ClearCompletes()
530 {
531 NS_ASSERTION(mInUpdate, "Must be in update to clear completes.");
533 mAddCompletes.Clear();
534 mSubCompletes.Clear();
536 UpdateHeader();
537 }
539 template<class T>
540 static void
541 ExpireEntries(FallibleTArray<T>* aEntries, ChunkSet& aExpirations)
542 {
543 T* addIter = aEntries->Elements();
544 T* end = aEntries->Elements() + aEntries->Length();
546 for (T *iter = addIter; iter != end; iter++) {
547 if (!aExpirations.Has(iter->Chunk())) {
548 *addIter = *iter;
549 addIter++;
550 }
551 }
553 aEntries->SetLength(addIter - aEntries->Elements());
554 }
556 nsresult
557 HashStore::Expire()
558 {
559 ExpireEntries(&mAddPrefixes, mAddExpirations);
560 ExpireEntries(&mAddCompletes, mAddExpirations);
561 ExpireEntries(&mSubPrefixes, mSubExpirations);
562 ExpireEntries(&mSubCompletes, mSubExpirations);
564 mAddChunks.Remove(mAddExpirations);
565 mSubChunks.Remove(mSubExpirations);
567 mAddExpirations.Clear();
568 mSubExpirations.Clear();
570 return NS_OK;
571 }
573 template<class T>
574 nsresult DeflateWriteTArray(nsIOutputStream* aStream, nsTArray<T>& aIn)
575 {
576 uLongf insize = aIn.Length() * sizeof(T);
577 uLongf outsize = compressBound(insize);
578 FallibleTArray<char> outBuff;
579 if (!outBuff.SetLength(outsize)) {
580 return NS_ERROR_OUT_OF_MEMORY;
581 }
583 int zerr = compress(reinterpret_cast<Bytef*>(outBuff.Elements()),
584 &outsize,
585 reinterpret_cast<const Bytef*>(aIn.Elements()),
586 insize);
587 if (zerr != Z_OK) {
588 return NS_ERROR_FAILURE;
589 }
590 LOG(("DeflateWriteTArray: %d in %d out", insize, outsize));
592 outBuff.TruncateLength(outsize);
594 // Length of compressed data stream
595 uint32_t dataLen = outBuff.Length();
596 uint32_t written;
597 nsresult rv = aStream->Write(reinterpret_cast<char*>(&dataLen), sizeof(dataLen), &written);
598 NS_ENSURE_SUCCESS(rv, rv);
600 NS_ASSERTION(written == sizeof(dataLen), "Error writing deflate length");
602 // Store to stream
603 rv = WriteTArray(aStream, outBuff);
604 NS_ENSURE_SUCCESS(rv, rv);
606 return NS_OK;
607 }
609 template<class T>
610 nsresult InflateReadTArray(nsIInputStream* aStream, FallibleTArray<T>* aOut,
611 uint32_t aExpectedSize)
612 {
614 uint32_t inLen;
615 uint32_t read;
616 nsresult rv = aStream->Read(reinterpret_cast<char*>(&inLen), sizeof(inLen), &read);
617 NS_ENSURE_SUCCESS(rv, rv);
619 NS_ASSERTION(read == sizeof(inLen), "Error reading inflate length");
621 FallibleTArray<char> inBuff;
622 if (!inBuff.SetLength(inLen)) {
623 return NS_ERROR_OUT_OF_MEMORY;
624 }
626 rv = ReadTArray(aStream, &inBuff, inLen);
627 NS_ENSURE_SUCCESS(rv, rv);
629 uLongf insize = inLen;
630 uLongf outsize = aExpectedSize * sizeof(T);
631 if (!aOut->SetLength(aExpectedSize)) {
632 return NS_ERROR_OUT_OF_MEMORY;
633 }
635 int zerr = uncompress(reinterpret_cast<Bytef*>(aOut->Elements()),
636 &outsize,
637 reinterpret_cast<const Bytef*>(inBuff.Elements()),
638 insize);
639 if (zerr != Z_OK) {
640 return NS_ERROR_FAILURE;
641 }
642 LOG(("InflateReadTArray: %d in %d out", insize, outsize));
644 NS_ASSERTION(outsize == aExpectedSize * sizeof(T), "Decompression size mismatch");
646 return NS_OK;
647 }
649 static nsresult
650 ByteSliceWrite(nsIOutputStream* aOut, nsTArray<uint32_t>& aData)
651 {
652 nsTArray<uint8_t> slice1;
653 nsTArray<uint8_t> slice2;
654 nsTArray<uint8_t> slice3;
655 nsTArray<uint8_t> slice4;
656 uint32_t count = aData.Length();
658 slice1.SetCapacity(count);
659 slice2.SetCapacity(count);
660 slice3.SetCapacity(count);
661 slice4.SetCapacity(count);
663 for (uint32_t i = 0; i < count; i++) {
664 slice1.AppendElement( aData[i] >> 24);
665 slice2.AppendElement((aData[i] >> 16) & 0xFF);
666 slice3.AppendElement((aData[i] >> 8) & 0xFF);
667 slice4.AppendElement( aData[i] & 0xFF);
668 }
670 nsresult rv = DeflateWriteTArray(aOut, slice1);
671 NS_ENSURE_SUCCESS(rv, rv);
672 rv = DeflateWriteTArray(aOut, slice2);
673 NS_ENSURE_SUCCESS(rv, rv);
674 rv = DeflateWriteTArray(aOut, slice3);
675 NS_ENSURE_SUCCESS(rv, rv);
676 // The LSB slice is generally uncompressible, don't bother
677 // compressing it.
678 rv = WriteTArray(aOut, slice4);
679 NS_ENSURE_SUCCESS(rv, rv);
681 return NS_OK;
682 }
684 static nsresult
685 ByteSliceRead(nsIInputStream* aInStream, FallibleTArray<uint32_t>* aData, uint32_t count)
686 {
687 FallibleTArray<uint8_t> slice1;
688 FallibleTArray<uint8_t> slice2;
689 FallibleTArray<uint8_t> slice3;
690 FallibleTArray<uint8_t> slice4;
692 nsresult rv = InflateReadTArray(aInStream, &slice1, count);
693 NS_ENSURE_SUCCESS(rv, rv);
695 rv = InflateReadTArray(aInStream, &slice2, count);
696 NS_ENSURE_SUCCESS(rv, rv);
698 rv = InflateReadTArray(aInStream, &slice3, count);
699 NS_ENSURE_SUCCESS(rv, rv);
701 rv = ReadTArray(aInStream, &slice4, count);
702 NS_ENSURE_SUCCESS(rv, rv);
704 if (!aData->SetCapacity(count)) {
705 return NS_ERROR_OUT_OF_MEMORY;
706 }
708 for (uint32_t i = 0; i < count; i++) {
709 aData->AppendElement((slice1[i] << 24) | (slice2[i] << 16)
710 | (slice3[i] << 8) | (slice4[i]));
711 }
713 return NS_OK;
714 }
716 nsresult
717 HashStore::ReadAddPrefixes()
718 {
719 FallibleTArray<uint32_t> chunks;
720 uint32_t count = mHeader.numAddPrefixes;
722 nsresult rv = ByteSliceRead(mInputStream, &chunks, count);
723 NS_ENSURE_SUCCESS(rv, rv);
725 if (!mAddPrefixes.SetCapacity(count)) {
726 return NS_ERROR_OUT_OF_MEMORY;
727 }
728 for (uint32_t i = 0; i < count; i++) {
729 AddPrefix *add = mAddPrefixes.AppendElement();
730 add->prefix.FromUint32(0);
731 add->addChunk = chunks[i];
732 }
734 return NS_OK;
735 }
737 nsresult
738 HashStore::ReadSubPrefixes()
739 {
740 FallibleTArray<uint32_t> addchunks;
741 FallibleTArray<uint32_t> subchunks;
742 FallibleTArray<uint32_t> prefixes;
743 uint32_t count = mHeader.numSubPrefixes;
745 nsresult rv = ByteSliceRead(mInputStream, &addchunks, count);
746 NS_ENSURE_SUCCESS(rv, rv);
748 rv = ByteSliceRead(mInputStream, &subchunks, count);
749 NS_ENSURE_SUCCESS(rv, rv);
751 rv = ByteSliceRead(mInputStream, &prefixes, count);
752 NS_ENSURE_SUCCESS(rv, rv);
754 if (!mSubPrefixes.SetCapacity(count)) {
755 return NS_ERROR_OUT_OF_MEMORY;
756 }
757 for (uint32_t i = 0; i < count; i++) {
758 SubPrefix *sub = mSubPrefixes.AppendElement();
759 sub->addChunk = addchunks[i];
760 sub->prefix.FromUint32(prefixes[i]);
761 sub->subChunk = subchunks[i];
762 }
764 return NS_OK;
765 }
767 // Split up PrefixArray back into the constituents
768 nsresult
769 HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
770 {
771 nsTArray<uint32_t> chunks;
772 uint32_t count = mAddPrefixes.Length();
773 chunks.SetCapacity(count);
775 for (uint32_t i = 0; i < count; i++) {
776 chunks.AppendElement(mAddPrefixes[i].Chunk());
777 }
779 nsresult rv = ByteSliceWrite(aOut, chunks);
780 NS_ENSURE_SUCCESS(rv, rv);
782 return NS_OK;
783 }
785 nsresult
786 HashStore::WriteSubPrefixes(nsIOutputStream* aOut)
787 {
788 nsTArray<uint32_t> addchunks;
789 nsTArray<uint32_t> subchunks;
790 nsTArray<uint32_t> prefixes;
791 uint32_t count = mSubPrefixes.Length();
792 addchunks.SetCapacity(count);
793 subchunks.SetCapacity(count);
794 prefixes.SetCapacity(count);
796 for (uint32_t i = 0; i < count; i++) {
797 addchunks.AppendElement(mSubPrefixes[i].AddChunk());
798 prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32());
799 subchunks.AppendElement(mSubPrefixes[i].Chunk());
800 }
802 nsresult rv = ByteSliceWrite(aOut, addchunks);
803 NS_ENSURE_SUCCESS(rv, rv);
805 rv = ByteSliceWrite(aOut, subchunks);
806 NS_ENSURE_SUCCESS(rv, rv);
808 rv = ByteSliceWrite(aOut, prefixes);
809 NS_ENSURE_SUCCESS(rv, rv);
811 return NS_OK;
812 }
814 nsresult
815 HashStore::WriteFile()
816 {
817 NS_ASSERTION(mInUpdate, "Must be in update to write database.");
819 nsCOMPtr<nsIFile> storeFile;
820 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
821 NS_ENSURE_SUCCESS(rv, rv);
822 rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
823 NS_ENSURE_SUCCESS(rv, rv);
825 nsCOMPtr<nsIOutputStream> out;
826 rv = NS_NewCheckSummedOutputStream(getter_AddRefs(out), storeFile,
827 PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
828 NS_ENSURE_SUCCESS(rv, rv);
830 uint32_t written;
831 rv = out->Write(reinterpret_cast<char*>(&mHeader), sizeof(mHeader), &written);
832 NS_ENSURE_SUCCESS(rv, rv);
834 // Write chunk numbers.
835 rv = mAddChunks.Write(out);
836 NS_ENSURE_SUCCESS(rv, rv);
838 rv = mSubChunks.Write(out);
839 NS_ENSURE_SUCCESS(rv, rv);
841 // Write hashes.
842 rv = WriteAddPrefixes(out);
843 NS_ENSURE_SUCCESS(rv, rv);
845 rv = WriteSubPrefixes(out);
846 NS_ENSURE_SUCCESS(rv, rv);
848 rv = WriteTArray(out, mAddCompletes);
849 NS_ENSURE_SUCCESS(rv, rv);
851 rv = WriteTArray(out, mSubCompletes);
852 NS_ENSURE_SUCCESS(rv, rv);
854 nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
855 NS_ENSURE_SUCCESS(rv, rv);
857 rv = safeOut->Finish();
858 NS_ENSURE_SUCCESS(rv, rv);
860 return NS_OK;
861 }
863 template <class T>
864 static void
865 Erase(FallibleTArray<T>* array, T* iterStart, T* iterEnd)
866 {
867 uint32_t start = iterStart - array->Elements();
868 uint32_t count = iterEnd - iterStart;
870 if (count > 0) {
871 array->RemoveElementsAt(start, count);
872 }
873 }
875 // Find items matching between |subs| and |adds|, and remove them,
876 // recording the item from |adds| in |adds_removed|. To minimize
877 // copies, the inputs are processing in parallel, so |subs| and |adds|
878 // should be compatibly ordered (either by SBAddPrefixLess or
879 // SBAddPrefixHashLess).
880 //
881 // |predAS| provides add < sub, |predSA| provides sub < add, for the
882 // tightest compare appropriate (see calls in SBProcessSubs).
883 template<class TSub, class TAdd>
884 static void
885 KnockoutSubs(FallibleTArray<TSub>* aSubs, FallibleTArray<TAdd>* aAdds)
886 {
887 // Keep a pair of output iterators for writing kept items. Due to
888 // deletions, these may lag the main iterators. Using erase() on
889 // individual items would result in O(N^2) copies. Using a list
890 // would work around that, at double or triple the memory cost.
891 TAdd* addOut = aAdds->Elements();
892 TAdd* addIter = aAdds->Elements();
894 TSub* subOut = aSubs->Elements();
895 TSub* subIter = aSubs->Elements();
897 TAdd* addEnd = addIter + aAdds->Length();
898 TSub* subEnd = subIter + aSubs->Length();
900 while (addIter != addEnd && subIter != subEnd) {
901 // additer compare, so it compares on add chunk
902 int32_t cmp = addIter->Compare(*subIter);
903 if (cmp > 0) {
904 // If |*sub_iter| < |*add_iter|, retain the sub.
905 *subOut = *subIter;
906 ++subOut;
907 ++subIter;
908 } else if (cmp < 0) {
909 // If |*add_iter| < |*sub_iter|, retain the add.
910 *addOut = *addIter;
911 ++addOut;
912 ++addIter;
913 } else {
914 // Drop equal items
915 ++addIter;
916 ++subIter;
917 }
918 }
920 Erase(aAdds, addOut, addIter);
921 Erase(aSubs, subOut, subIter);
922 }
924 // Remove items in |removes| from |fullHashes|. |fullHashes| and
925 // |removes| should be ordered by SBAddPrefix component.
926 template <class T>
927 static void
928 RemoveMatchingPrefixes(const SubPrefixArray& aSubs, FallibleTArray<T>* aFullHashes)
929 {
930 // Where to store kept items.
931 T* out = aFullHashes->Elements();
932 T* hashIter = out;
933 T* hashEnd = aFullHashes->Elements() + aFullHashes->Length();
935 SubPrefix const * removeIter = aSubs.Elements();
936 SubPrefix const * removeEnd = aSubs.Elements() + aSubs.Length();
938 while (hashIter != hashEnd && removeIter != removeEnd) {
939 int32_t cmp = removeIter->CompareAlt(*hashIter);
940 if (cmp > 0) {
941 // Keep items less than |*removeIter|.
942 *out = *hashIter;
943 ++out;
944 ++hashIter;
945 } else if (cmp < 0) {
946 // No hit for |*removeIter|, bump it forward.
947 ++removeIter;
948 } else {
949 // Drop equal items, there may be multiple hits.
950 do {
951 ++hashIter;
952 } while (hashIter != hashEnd &&
953 !(removeIter->CompareAlt(*hashIter) < 0));
954 ++removeIter;
955 }
956 }
957 Erase(aFullHashes, out, hashIter);
958 }
960 static void
961 RemoveDeadSubPrefixes(SubPrefixArray& aSubs, ChunkSet& aAddChunks)
962 {
963 SubPrefix * subIter = aSubs.Elements();
964 SubPrefix * subEnd = aSubs.Elements() + aSubs.Length();
966 for (SubPrefix * iter = subIter; iter != subEnd; iter++) {
967 bool hasChunk = aAddChunks.Has(iter->AddChunk());
968 // Keep the subprefix if the chunk it refers to is one
969 // we haven't seen it yet.
970 if (!hasChunk) {
971 *subIter = *iter;
972 subIter++;
973 }
974 }
976 LOG(("Removed %u dead SubPrefix entries.", subEnd - subIter));
977 aSubs.SetLength(subIter - aSubs.Elements());
978 }
980 #ifdef DEBUG
981 template <class T>
982 static void EnsureSorted(FallibleTArray<T>* aArray)
983 {
984 T* start = aArray->Elements();
985 T* end = aArray->Elements() + aArray->Length();
986 T* iter = start;
987 T* previous = start;
989 while (iter != end) {
990 previous = iter;
991 ++iter;
992 if (iter != end) {
993 MOZ_ASSERT(iter->Compare(*previous) >= 0);
994 }
995 }
997 return;
998 }
999 #endif
1001 nsresult
1002 HashStore::ProcessSubs()
1003 {
1004 #ifdef DEBUG
1005 EnsureSorted(&mAddPrefixes);
1006 EnsureSorted(&mSubPrefixes);
1007 EnsureSorted(&mAddCompletes);
1008 EnsureSorted(&mSubCompletes);
1009 LOG(("All databases seem to have a consistent sort order."));
1010 #endif
1012 RemoveMatchingPrefixes(mSubPrefixes, &mAddCompletes);
1013 RemoveMatchingPrefixes(mSubPrefixes, &mSubCompletes);
1015 // Remove any remaining subbed prefixes from both addprefixes
1016 // and addcompletes.
1017 KnockoutSubs(&mSubPrefixes, &mAddPrefixes);
1018 KnockoutSubs(&mSubCompletes, &mAddCompletes);
1020 // Remove any remaining subprefixes referring to addchunks that
1021 // we have (and hence have been processed above).
1022 RemoveDeadSubPrefixes(mSubPrefixes, mAddChunks);
1024 #ifdef DEBUG
1025 EnsureSorted(&mAddPrefixes);
1026 EnsureSorted(&mSubPrefixes);
1027 EnsureSorted(&mAddCompletes);
1028 EnsureSorted(&mSubCompletes);
1029 LOG(("All databases seem to have a consistent sort order."));
1030 #endif
1032 return NS_OK;
1033 }
1035 nsresult
1036 HashStore::AugmentAdds(const nsTArray<uint32_t>& aPrefixes)
1037 {
1038 uint32_t cnt = aPrefixes.Length();
1039 if (cnt != mAddPrefixes.Length()) {
1040 LOG(("Amount of prefixes in cache not consistent with store (%d vs %d)",
1041 aPrefixes.Length(), mAddPrefixes.Length()));
1042 return NS_ERROR_FAILURE;
1043 }
1044 for (uint32_t i = 0; i < cnt; i++) {
1045 mAddPrefixes[i].prefix.FromUint32(aPrefixes[i]);
1046 }
1047 return NS_OK;
1048 }
1050 }
1051 }