|
1 // Copyright 2008 Google Inc. All Rights Reserved. |
|
2 // |
|
3 // Redistribution and use in source and binary forms, with or without |
|
4 // modification, are permitted provided that the following conditions are |
|
5 // met: |
|
6 // |
|
7 // * Redistributions of source code must retain the above copyright |
|
8 // notice, this list of conditions and the following disclaimer. |
|
9 // * Redistributions in binary form must reproduce the above |
|
10 // copyright notice, this list of conditions and the following disclaimer |
|
11 // in the documentation and/or other materials provided with the |
|
12 // distribution. |
|
13 // * Neither the name of Google Inc. nor the names of its |
|
14 // contributors may be used to endorse or promote products derived from |
|
15 // this software without specific prior written permission. |
|
16 // |
|
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
28 // |
|
29 // Internals shared between the Snappy implementation and its unittest. |
|
30 |
|
31 #ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_ |
|
32 #define UTIL_SNAPPY_SNAPPY_INTERNAL_H_ |
|
33 |
|
34 #include "snappy-stubs-internal.h" |
|
35 |
|
36 namespace snappy { |
|
37 namespace internal { |
|
38 |
|
39 class WorkingMemory { |
|
40 public: |
|
41 WorkingMemory() : large_table_(NULL) { } |
|
42 ~WorkingMemory() { delete[] large_table_; } |
|
43 |
|
44 // Allocates and clears a hash table using memory in "*this", |
|
45 // stores the number of buckets in "*table_size" and returns a pointer to |
|
46 // the base of the hash table. |
|
47 uint16* GetHashTable(size_t input_size, int* table_size); |
|
48 |
|
49 private: |
|
50 uint16 small_table_[1<<10]; // 2KB |
|
51 uint16* large_table_; // Allocated only when needed |
|
52 |
|
53 DISALLOW_COPY_AND_ASSIGN(WorkingMemory); |
|
54 }; |
|
55 |
|
56 // Flat array compression that does not emit the "uncompressed length" |
|
57 // prefix. Compresses "input" string to the "*op" buffer. |
|
58 // |
|
59 // REQUIRES: "input_length <= kBlockSize" |
|
60 // REQUIRES: "op" points to an array of memory that is at least |
|
61 // "MaxCompressedLength(input_length)" in size. |
|
62 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. |
|
63 // REQUIRES: "table_size" is a power of two |
|
64 // |
|
65 // Returns an "end" pointer into "op" buffer. |
|
66 // "end - op" is the compressed size of "input". |
|
67 char* CompressFragment(const char* input, |
|
68 size_t input_length, |
|
69 char* op, |
|
70 uint16* table, |
|
71 const int table_size); |
|
72 |
|
73 // Return the largest n such that |
|
74 // |
|
75 // s1[0,n-1] == s2[0,n-1] |
|
76 // and n <= (s2_limit - s2). |
|
77 // |
|
78 // Does not read *s2_limit or beyond. |
|
79 // Does not read *(s1 + (s2_limit - s2)) or beyond. |
|
80 // Requires that s2_limit >= s2. |
|
81 // |
|
82 // Separate implementation for x86_64, for speed. Uses the fact that |
|
83 // x86_64 is little endian. |
|
84 #if defined(ARCH_K8) |
|
85 static inline int FindMatchLength(const char* s1, |
|
86 const char* s2, |
|
87 const char* s2_limit) { |
|
88 DCHECK_GE(s2_limit, s2); |
|
89 int matched = 0; |
|
90 |
|
91 // Find out how long the match is. We loop over the data 64 bits at a |
|
92 // time until we find a 64-bit block that doesn't match; then we find |
|
93 // the first non-matching bit and use that to calculate the total |
|
94 // length of the match. |
|
95 while (PREDICT_TRUE(s2 <= s2_limit - 8)) { |
|
96 if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) { |
|
97 s2 += 8; |
|
98 matched += 8; |
|
99 } else { |
|
100 // On current (mid-2008) Opteron models there is a 3% more |
|
101 // efficient code sequence to find the first non-matching byte. |
|
102 // However, what follows is ~10% better on Intel Core 2 and newer, |
|
103 // and we expect AMD's bsf instruction to improve. |
|
104 uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); |
|
105 int matching_bits = Bits::FindLSBSetNonZero64(x); |
|
106 matched += matching_bits >> 3; |
|
107 return matched; |
|
108 } |
|
109 } |
|
110 while (PREDICT_TRUE(s2 < s2_limit)) { |
|
111 if (PREDICT_TRUE(s1[matched] == *s2)) { |
|
112 ++s2; |
|
113 ++matched; |
|
114 } else { |
|
115 return matched; |
|
116 } |
|
117 } |
|
118 return matched; |
|
119 } |
|
120 #else |
|
121 static inline int FindMatchLength(const char* s1, |
|
122 const char* s2, |
|
123 const char* s2_limit) { |
|
124 // Implementation based on the x86-64 version, above. |
|
125 DCHECK_GE(s2_limit, s2); |
|
126 int matched = 0; |
|
127 |
|
128 while (s2 <= s2_limit - 4 && |
|
129 UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { |
|
130 s2 += 4; |
|
131 matched += 4; |
|
132 } |
|
133 if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { |
|
134 uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); |
|
135 int matching_bits = Bits::FindLSBSetNonZero(x); |
|
136 matched += matching_bits >> 3; |
|
137 } else { |
|
138 while ((s2 < s2_limit) && (s1[matched] == *s2)) { |
|
139 ++s2; |
|
140 ++matched; |
|
141 } |
|
142 } |
|
143 return matched; |
|
144 } |
|
145 #endif |
|
146 |
|
147 } // end namespace internal |
|
148 } // end namespace snappy |
|
149 |
|
150 #endif // UTIL_SNAPPY_SNAPPY_INTERNAL_H_ |