|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
|
2 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ : |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "mozilla/ArrayUtils.h" |
|
8 |
|
9 #include "mozStorageSQLFunctions.h" |
|
10 #include "nsUnicharUtils.h" |
|
11 #include <algorithm> |
|
12 |
|
13 namespace mozilla { |
|
14 namespace storage { |
|
15 |
|
16 //////////////////////////////////////////////////////////////////////////////// |
|
17 //// Local Helper Functions |
|
18 |
|
19 namespace { |
|
20 |
|
21 /** |
|
22 * Performs the LIKE comparison of a string against a pattern. For more detail |
|
23 * see http://www.sqlite.org/lang_expr.html#like. |
|
24 * |
|
25 * @param aPatternItr |
|
26 * An iterator at the start of the pattern to check for. |
|
27 * @param aPatternEnd |
|
28 * An iterator at the end of the pattern to check for. |
|
29 * @param aStringItr |
|
30 * An iterator at the start of the string to check for the pattern. |
|
31 * @param aStringEnd |
|
32 * An iterator at the end of the string to check for the pattern. |
|
33 * @param aEscapeChar |
|
34 * The character to use for escaping symbols in the pattern. |
|
35 * @return 1 if the pattern is found, 0 otherwise. |
|
36 */ |
|
37 int |
|
38 likeCompare(nsAString::const_iterator aPatternItr, |
|
39 nsAString::const_iterator aPatternEnd, |
|
40 nsAString::const_iterator aStringItr, |
|
41 nsAString::const_iterator aStringEnd, |
|
42 char16_t aEscapeChar) |
|
43 { |
|
44 const char16_t MATCH_ALL('%'); |
|
45 const char16_t MATCH_ONE('_'); |
|
46 |
|
47 bool lastWasEscape = false; |
|
48 while (aPatternItr != aPatternEnd) { |
|
49 /** |
|
50 * What we do in here is take a look at each character from the input |
|
51 * pattern, and do something with it. There are 4 possibilities: |
|
52 * 1) character is an un-escaped match-all character |
|
53 * 2) character is an un-escaped match-one character |
|
54 * 3) character is an un-escaped escape character |
|
55 * 4) character is not any of the above |
|
56 */ |
|
57 if (!lastWasEscape && *aPatternItr == MATCH_ALL) { |
|
58 // CASE 1 |
|
59 /** |
|
60 * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a |
|
61 * MATCH_ALL character. For each MATCH_ONE character, skip one character |
|
62 * in the pattern string. |
|
63 */ |
|
64 while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) { |
|
65 if (*aPatternItr == MATCH_ONE) { |
|
66 // If we've hit the end of the string we are testing, no match |
|
67 if (aStringItr == aStringEnd) |
|
68 return 0; |
|
69 aStringItr++; |
|
70 } |
|
71 aPatternItr++; |
|
72 } |
|
73 |
|
74 // If we've hit the end of the pattern string, match |
|
75 if (aPatternItr == aPatternEnd) |
|
76 return 1; |
|
77 |
|
78 while (aStringItr != aStringEnd) { |
|
79 if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd, |
|
80 aEscapeChar)) { |
|
81 // we've hit a match, so indicate this |
|
82 return 1; |
|
83 } |
|
84 aStringItr++; |
|
85 } |
|
86 |
|
87 // No match |
|
88 return 0; |
|
89 } |
|
90 else if (!lastWasEscape && *aPatternItr == MATCH_ONE) { |
|
91 // CASE 2 |
|
92 if (aStringItr == aStringEnd) { |
|
93 // If we've hit the end of the string we are testing, no match |
|
94 return 0; |
|
95 } |
|
96 aStringItr++; |
|
97 lastWasEscape = false; |
|
98 } |
|
99 else if (!lastWasEscape && *aPatternItr == aEscapeChar) { |
|
100 // CASE 3 |
|
101 lastWasEscape = true; |
|
102 } |
|
103 else { |
|
104 // CASE 4 |
|
105 if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) { |
|
106 // If we've hit a point where the strings don't match, there is no match |
|
107 return 0; |
|
108 } |
|
109 aStringItr++; |
|
110 lastWasEscape = false; |
|
111 } |
|
112 |
|
113 aPatternItr++; |
|
114 } |
|
115 |
|
116 return aStringItr == aStringEnd; |
|
117 } |
|
118 |
|
119 /** |
|
120 * This class manages a dynamic array. It can represent an array of any |
|
121 * reasonable size, but if the array is "N" elements or smaller, it will be |
|
122 * stored using fixed space inside the auto array itself. If the auto array |
|
123 * is a local variable, this internal storage will be allocated cheaply on the |
|
124 * stack, similar to nsAutoString. If a larger size is requested, the memory |
|
125 * will be dynamically allocated from the heap. Since the destructor will |
|
126 * free any heap-allocated memory, client code doesn't need to care where the |
|
127 * memory came from. |
|
128 */ |
|
129 template <class T, size_t N> class AutoArray |
|
130 { |
|
131 |
|
132 public: |
|
133 |
|
134 AutoArray(size_t size) |
|
135 : mBuffer(size <= N ? mAutoBuffer : new T[size]) |
|
136 { |
|
137 } |
|
138 |
|
139 ~AutoArray() |
|
140 { |
|
141 if (mBuffer != mAutoBuffer) |
|
142 delete[] mBuffer; |
|
143 } |
|
144 |
|
145 /** |
|
146 * Return the pointer to the allocated array. |
|
147 * @note If the array allocation failed, get() will return nullptr! |
|
148 * |
|
149 * @return the pointer to the allocated array |
|
150 */ |
|
151 T *get() |
|
152 { |
|
153 return mBuffer; |
|
154 } |
|
155 |
|
156 private: |
|
157 T *mBuffer; // Points to mAutoBuffer if we can use it, heap otherwise. |
|
158 T mAutoBuffer[N]; // The internal memory buffer that we use if we can. |
|
159 }; |
|
160 |
|
161 /** |
|
162 * Compute the Levenshtein Edit Distance between two strings. |
|
163 * |
|
164 * @param aStringS |
|
165 * a string |
|
166 * @param aStringT |
|
167 * another string |
|
168 * @param _result |
|
169 * an outparam that will receive the edit distance between the arguments |
|
170 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc. |
|
171 */ |
|
172 int |
|
173 levenshteinDistance(const nsAString &aStringS, |
|
174 const nsAString &aStringT, |
|
175 int *_result) |
|
176 { |
|
177 // Set the result to a non-sensical value in case we encounter an error. |
|
178 *_result = -1; |
|
179 |
|
180 const uint32_t sLen = aStringS.Length(); |
|
181 const uint32_t tLen = aStringT.Length(); |
|
182 |
|
183 if (sLen == 0) { |
|
184 *_result = tLen; |
|
185 return SQLITE_OK; |
|
186 } |
|
187 if (tLen == 0) { |
|
188 *_result = sLen; |
|
189 return SQLITE_OK; |
|
190 } |
|
191 |
|
192 // Notionally, Levenshtein Distance is computed in a matrix. If we |
|
193 // assume s = "span" and t = "spam", the matrix would look like this: |
|
194 // s --> |
|
195 // t s p a n |
|
196 // | 0 1 2 3 4 |
|
197 // V s 1 * * * * |
|
198 // p 2 * * * * |
|
199 // a 3 * * * * |
|
200 // m 4 * * * * |
|
201 // |
|
202 // Note that the row width is sLen + 1 and the column height is tLen + 1, |
|
203 // where sLen is the length of the string "s" and tLen is the length of "t". |
|
204 // The first row and the first column are initialized as shown, and |
|
205 // the algorithm computes the remaining cells row-by-row, and |
|
206 // left-to-right within each row. The computation only requires that |
|
207 // we be able to see the current row and the previous one. |
|
208 |
|
209 // Allocate memory for two rows. Use AutoArray's to manage the memory |
|
210 // so we don't have to explicitly free it, and so we can avoid the expense |
|
211 // of memory allocations for relatively small strings. |
|
212 AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1); |
|
213 AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1); |
|
214 |
|
215 // Declare the raw pointers that will actually be used to access the memory. |
|
216 int *prevRow = row1.get(); |
|
217 NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM); |
|
218 int *currRow = row2.get(); |
|
219 NS_ENSURE_TRUE(currRow, SQLITE_NOMEM); |
|
220 |
|
221 // Initialize the first row. |
|
222 for (uint32_t i = 0; i <= sLen; i++) |
|
223 prevRow[i] = i; |
|
224 |
|
225 const char16_t *s = aStringS.BeginReading(); |
|
226 const char16_t *t = aStringT.BeginReading(); |
|
227 |
|
228 // Compute the empty cells in the "matrix" row-by-row, starting with |
|
229 // the second row. |
|
230 for (uint32_t ti = 1; ti <= tLen; ti++) { |
|
231 |
|
232 // Initialize the first cell in this row. |
|
233 currRow[0] = ti; |
|
234 |
|
235 // Get the character from "t" that corresponds to this row. |
|
236 const char16_t tch = t[ti - 1]; |
|
237 |
|
238 // Compute the remaining cells in this row, left-to-right, |
|
239 // starting at the second column (and first character of "s"). |
|
240 for (uint32_t si = 1; si <= sLen; si++) { |
|
241 |
|
242 // Get the character from "s" that corresponds to this column, |
|
243 // compare it to the t-character, and compute the "cost". |
|
244 const char16_t sch = s[si - 1]; |
|
245 int cost = (sch == tch) ? 0 : 1; |
|
246 |
|
247 // ............ We want to calculate the value of cell "d" from |
|
248 // ...ab....... the previously calculated (or initialized) cells |
|
249 // ...cd....... "a", "b", and "c", where d = min(a', b', c'). |
|
250 // ............ |
|
251 int aPrime = prevRow[si - 1] + cost; |
|
252 int bPrime = prevRow[si] + 1; |
|
253 int cPrime = currRow[si - 1] + 1; |
|
254 currRow[si] = std::min(aPrime, std::min(bPrime, cPrime)); |
|
255 } |
|
256 |
|
257 // Advance to the next row. The current row becomes the previous |
|
258 // row and we recycle the old previous row as the new current row. |
|
259 // We don't need to re-initialize the new current row since we will |
|
260 // rewrite all of its cells anyway. |
|
261 int *oldPrevRow = prevRow; |
|
262 prevRow = currRow; |
|
263 currRow = oldPrevRow; |
|
264 } |
|
265 |
|
266 // The final result is the value of the last cell in the last row. |
|
267 // Note that that's now in the "previous" row, since we just swapped them. |
|
268 *_result = prevRow[sLen]; |
|
269 return SQLITE_OK; |
|
270 } |
|
271 |
|
272 // This struct is used only by registerFunctions below, but ISO C++98 forbids |
|
273 // instantiating a template dependent on a locally-defined type. Boo-urns! |
|
274 struct Functions { |
|
275 const char *zName; |
|
276 int nArg; |
|
277 int enc; |
|
278 void *pContext; |
|
279 void (*xFunc)(::sqlite3_context*, int, sqlite3_value**); |
|
280 }; |
|
281 |
|
282 } // anonymous namespace |
|
283 |
|
284 //////////////////////////////////////////////////////////////////////////////// |
|
285 //// Exposed Functions |
|
286 |
|
287 int |
|
288 registerFunctions(sqlite3 *aDB) |
|
289 { |
|
290 Functions functions[] = { |
|
291 {"lower", |
|
292 1, |
|
293 SQLITE_UTF16, |
|
294 0, |
|
295 caseFunction}, |
|
296 {"lower", |
|
297 1, |
|
298 SQLITE_UTF8, |
|
299 0, |
|
300 caseFunction}, |
|
301 {"upper", |
|
302 1, |
|
303 SQLITE_UTF16, |
|
304 (void*)1, |
|
305 caseFunction}, |
|
306 {"upper", |
|
307 1, |
|
308 SQLITE_UTF8, |
|
309 (void*)1, |
|
310 caseFunction}, |
|
311 |
|
312 {"like", |
|
313 2, |
|
314 SQLITE_UTF16, |
|
315 0, |
|
316 likeFunction}, |
|
317 {"like", |
|
318 2, |
|
319 SQLITE_UTF8, |
|
320 0, |
|
321 likeFunction}, |
|
322 {"like", |
|
323 3, |
|
324 SQLITE_UTF16, |
|
325 0, |
|
326 likeFunction}, |
|
327 {"like", |
|
328 3, |
|
329 SQLITE_UTF8, |
|
330 0, |
|
331 likeFunction}, |
|
332 |
|
333 {"levenshteinDistance", |
|
334 2, |
|
335 SQLITE_UTF16, |
|
336 0, |
|
337 levenshteinDistanceFunction}, |
|
338 {"levenshteinDistance", |
|
339 2, |
|
340 SQLITE_UTF8, |
|
341 0, |
|
342 levenshteinDistanceFunction}, |
|
343 }; |
|
344 |
|
345 int rv = SQLITE_OK; |
|
346 for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) { |
|
347 struct Functions *p = &functions[i]; |
|
348 rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext, |
|
349 p->xFunc, nullptr, nullptr); |
|
350 } |
|
351 |
|
352 return rv; |
|
353 } |
|
354 |
|
355 //////////////////////////////////////////////////////////////////////////////// |
|
356 //// SQL Functions |
|
357 |
|
358 void |
|
359 caseFunction(sqlite3_context *aCtx, |
|
360 int aArgc, |
|
361 sqlite3_value **aArgv) |
|
362 { |
|
363 NS_ASSERTION(1 == aArgc, "Invalid number of arguments!"); |
|
364 |
|
365 nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); |
|
366 bool toUpper = ::sqlite3_user_data(aCtx) ? true : false; |
|
367 |
|
368 if (toUpper) |
|
369 ::ToUpperCase(data); |
|
370 else |
|
371 ::ToLowerCase(data); |
|
372 |
|
373 // Set the result. |
|
374 ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT); |
|
375 } |
|
376 |
|
377 /** |
|
378 * This implements the like() SQL function. This is used by the LIKE operator. |
|
379 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is |
|
380 * an escape character, say E, it is implemented as 'like(B, A, E)'. |
|
381 */ |
|
382 void |
|
383 likeFunction(sqlite3_context *aCtx, |
|
384 int aArgc, |
|
385 sqlite3_value **aArgv) |
|
386 { |
|
387 NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!"); |
|
388 |
|
389 if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) { |
|
390 ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex", |
|
391 SQLITE_TOOBIG); |
|
392 return; |
|
393 } |
|
394 |
|
395 if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1])) |
|
396 return; |
|
397 |
|
398 nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]))); |
|
399 nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); |
|
400 NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!"); |
|
401 |
|
402 char16_t E = 0; |
|
403 if (3 == aArgc) |
|
404 E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0]; |
|
405 |
|
406 nsAString::const_iterator itrString, endString; |
|
407 A.BeginReading(itrString); |
|
408 A.EndReading(endString); |
|
409 nsAString::const_iterator itrPattern, endPattern; |
|
410 B.BeginReading(itrPattern); |
|
411 B.EndReading(endPattern); |
|
412 ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString, |
|
413 endString, E)); |
|
414 } |
|
415 |
|
416 void levenshteinDistanceFunction(sqlite3_context *aCtx, |
|
417 int aArgc, |
|
418 sqlite3_value **aArgv) |
|
419 { |
|
420 NS_ASSERTION(2 == aArgc, "Invalid number of arguments!"); |
|
421 |
|
422 // If either argument is a SQL NULL, then return SQL NULL. |
|
423 if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL || |
|
424 ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) { |
|
425 ::sqlite3_result_null(aCtx); |
|
426 return; |
|
427 } |
|
428 |
|
429 int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t); |
|
430 const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])); |
|
431 |
|
432 int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t); |
|
433 const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])); |
|
434 |
|
435 // Compute the Levenshtein Distance, and return the result (or error). |
|
436 int distance = -1; |
|
437 const nsDependentString A(a, aLen); |
|
438 const nsDependentString B(b, bLen); |
|
439 int status = levenshteinDistance(A, B, &distance); |
|
440 if (status == SQLITE_OK) { |
|
441 ::sqlite3_result_int(aCtx, distance); |
|
442 } |
|
443 else if (status == SQLITE_NOMEM) { |
|
444 ::sqlite3_result_error_nomem(aCtx); |
|
445 } |
|
446 else { |
|
447 ::sqlite3_result_error(aCtx, "User function returned error code", -1); |
|
448 } |
|
449 } |
|
450 |
|
451 } // namespace storage |
|
452 } // namespace mozilla |