storage/src/mozStorageSQLFunctions.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
michael@0 2 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
michael@0 3 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include "mozilla/ArrayUtils.h"
michael@0 8
michael@0 9 #include "mozStorageSQLFunctions.h"
michael@0 10 #include "nsUnicharUtils.h"
michael@0 11 #include <algorithm>
michael@0 12
michael@0 13 namespace mozilla {
michael@0 14 namespace storage {
michael@0 15
michael@0 16 ////////////////////////////////////////////////////////////////////////////////
michael@0 17 //// Local Helper Functions
michael@0 18
michael@0 19 namespace {
michael@0 20
michael@0 21 /**
michael@0 22 * Performs the LIKE comparison of a string against a pattern. For more detail
michael@0 23 * see http://www.sqlite.org/lang_expr.html#like.
michael@0 24 *
michael@0 25 * @param aPatternItr
michael@0 26 * An iterator at the start of the pattern to check for.
michael@0 27 * @param aPatternEnd
michael@0 28 * An iterator at the end of the pattern to check for.
michael@0 29 * @param aStringItr
michael@0 30 * An iterator at the start of the string to check for the pattern.
michael@0 31 * @param aStringEnd
michael@0 32 * An iterator at the end of the string to check for the pattern.
michael@0 33 * @param aEscapeChar
michael@0 34 * The character to use for escaping symbols in the pattern.
michael@0 35 * @return 1 if the pattern is found, 0 otherwise.
michael@0 36 */
michael@0 37 int
michael@0 38 likeCompare(nsAString::const_iterator aPatternItr,
michael@0 39 nsAString::const_iterator aPatternEnd,
michael@0 40 nsAString::const_iterator aStringItr,
michael@0 41 nsAString::const_iterator aStringEnd,
michael@0 42 char16_t aEscapeChar)
michael@0 43 {
michael@0 44 const char16_t MATCH_ALL('%');
michael@0 45 const char16_t MATCH_ONE('_');
michael@0 46
michael@0 47 bool lastWasEscape = false;
michael@0 48 while (aPatternItr != aPatternEnd) {
michael@0 49 /**
michael@0 50 * What we do in here is take a look at each character from the input
michael@0 51 * pattern, and do something with it. There are 4 possibilities:
michael@0 52 * 1) character is an un-escaped match-all character
michael@0 53 * 2) character is an un-escaped match-one character
michael@0 54 * 3) character is an un-escaped escape character
michael@0 55 * 4) character is not any of the above
michael@0 56 */
michael@0 57 if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
michael@0 58 // CASE 1
michael@0 59 /**
michael@0 60 * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
michael@0 61 * MATCH_ALL character. For each MATCH_ONE character, skip one character
michael@0 62 * in the pattern string.
michael@0 63 */
michael@0 64 while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
michael@0 65 if (*aPatternItr == MATCH_ONE) {
michael@0 66 // If we've hit the end of the string we are testing, no match
michael@0 67 if (aStringItr == aStringEnd)
michael@0 68 return 0;
michael@0 69 aStringItr++;
michael@0 70 }
michael@0 71 aPatternItr++;
michael@0 72 }
michael@0 73
michael@0 74 // If we've hit the end of the pattern string, match
michael@0 75 if (aPatternItr == aPatternEnd)
michael@0 76 return 1;
michael@0 77
michael@0 78 while (aStringItr != aStringEnd) {
michael@0 79 if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
michael@0 80 aEscapeChar)) {
michael@0 81 // we've hit a match, so indicate this
michael@0 82 return 1;
michael@0 83 }
michael@0 84 aStringItr++;
michael@0 85 }
michael@0 86
michael@0 87 // No match
michael@0 88 return 0;
michael@0 89 }
michael@0 90 else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
michael@0 91 // CASE 2
michael@0 92 if (aStringItr == aStringEnd) {
michael@0 93 // If we've hit the end of the string we are testing, no match
michael@0 94 return 0;
michael@0 95 }
michael@0 96 aStringItr++;
michael@0 97 lastWasEscape = false;
michael@0 98 }
michael@0 99 else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
michael@0 100 // CASE 3
michael@0 101 lastWasEscape = true;
michael@0 102 }
michael@0 103 else {
michael@0 104 // CASE 4
michael@0 105 if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
michael@0 106 // If we've hit a point where the strings don't match, there is no match
michael@0 107 return 0;
michael@0 108 }
michael@0 109 aStringItr++;
michael@0 110 lastWasEscape = false;
michael@0 111 }
michael@0 112
michael@0 113 aPatternItr++;
michael@0 114 }
michael@0 115
michael@0 116 return aStringItr == aStringEnd;
michael@0 117 }
michael@0 118
michael@0 119 /**
michael@0 120 * This class manages a dynamic array. It can represent an array of any
michael@0 121 * reasonable size, but if the array is "N" elements or smaller, it will be
michael@0 122 * stored using fixed space inside the auto array itself. If the auto array
michael@0 123 * is a local variable, this internal storage will be allocated cheaply on the
michael@0 124 * stack, similar to nsAutoString. If a larger size is requested, the memory
michael@0 125 * will be dynamically allocated from the heap. Since the destructor will
michael@0 126 * free any heap-allocated memory, client code doesn't need to care where the
michael@0 127 * memory came from.
michael@0 128 */
michael@0 129 template <class T, size_t N> class AutoArray
michael@0 130 {
michael@0 131
michael@0 132 public:
michael@0 133
michael@0 134 AutoArray(size_t size)
michael@0 135 : mBuffer(size <= N ? mAutoBuffer : new T[size])
michael@0 136 {
michael@0 137 }
michael@0 138
michael@0 139 ~AutoArray()
michael@0 140 {
michael@0 141 if (mBuffer != mAutoBuffer)
michael@0 142 delete[] mBuffer;
michael@0 143 }
michael@0 144
michael@0 145 /**
michael@0 146 * Return the pointer to the allocated array.
michael@0 147 * @note If the array allocation failed, get() will return nullptr!
michael@0 148 *
michael@0 149 * @return the pointer to the allocated array
michael@0 150 */
michael@0 151 T *get()
michael@0 152 {
michael@0 153 return mBuffer;
michael@0 154 }
michael@0 155
michael@0 156 private:
michael@0 157 T *mBuffer; // Points to mAutoBuffer if we can use it, heap otherwise.
michael@0 158 T mAutoBuffer[N]; // The internal memory buffer that we use if we can.
michael@0 159 };
michael@0 160
michael@0 161 /**
michael@0 162 * Compute the Levenshtein Edit Distance between two strings.
michael@0 163 *
michael@0 164 * @param aStringS
michael@0 165 * a string
michael@0 166 * @param aStringT
michael@0 167 * another string
michael@0 168 * @param _result
michael@0 169 * an outparam that will receive the edit distance between the arguments
michael@0 170 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
michael@0 171 */
michael@0 172 int
michael@0 173 levenshteinDistance(const nsAString &aStringS,
michael@0 174 const nsAString &aStringT,
michael@0 175 int *_result)
michael@0 176 {
michael@0 177 // Set the result to a non-sensical value in case we encounter an error.
michael@0 178 *_result = -1;
michael@0 179
michael@0 180 const uint32_t sLen = aStringS.Length();
michael@0 181 const uint32_t tLen = aStringT.Length();
michael@0 182
michael@0 183 if (sLen == 0) {
michael@0 184 *_result = tLen;
michael@0 185 return SQLITE_OK;
michael@0 186 }
michael@0 187 if (tLen == 0) {
michael@0 188 *_result = sLen;
michael@0 189 return SQLITE_OK;
michael@0 190 }
michael@0 191
michael@0 192 // Notionally, Levenshtein Distance is computed in a matrix. If we
michael@0 193 // assume s = "span" and t = "spam", the matrix would look like this:
michael@0 194 // s -->
michael@0 195 // t s p a n
michael@0 196 // | 0 1 2 3 4
michael@0 197 // V s 1 * * * *
michael@0 198 // p 2 * * * *
michael@0 199 // a 3 * * * *
michael@0 200 // m 4 * * * *
michael@0 201 //
michael@0 202 // Note that the row width is sLen + 1 and the column height is tLen + 1,
michael@0 203 // where sLen is the length of the string "s" and tLen is the length of "t".
michael@0 204 // The first row and the first column are initialized as shown, and
michael@0 205 // the algorithm computes the remaining cells row-by-row, and
michael@0 206 // left-to-right within each row. The computation only requires that
michael@0 207 // we be able to see the current row and the previous one.
michael@0 208
michael@0 209 // Allocate memory for two rows. Use AutoArray's to manage the memory
michael@0 210 // so we don't have to explicitly free it, and so we can avoid the expense
michael@0 211 // of memory allocations for relatively small strings.
michael@0 212 AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1);
michael@0 213 AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1);
michael@0 214
michael@0 215 // Declare the raw pointers that will actually be used to access the memory.
michael@0 216 int *prevRow = row1.get();
michael@0 217 NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM);
michael@0 218 int *currRow = row2.get();
michael@0 219 NS_ENSURE_TRUE(currRow, SQLITE_NOMEM);
michael@0 220
michael@0 221 // Initialize the first row.
michael@0 222 for (uint32_t i = 0; i <= sLen; i++)
michael@0 223 prevRow[i] = i;
michael@0 224
michael@0 225 const char16_t *s = aStringS.BeginReading();
michael@0 226 const char16_t *t = aStringT.BeginReading();
michael@0 227
michael@0 228 // Compute the empty cells in the "matrix" row-by-row, starting with
michael@0 229 // the second row.
michael@0 230 for (uint32_t ti = 1; ti <= tLen; ti++) {
michael@0 231
michael@0 232 // Initialize the first cell in this row.
michael@0 233 currRow[0] = ti;
michael@0 234
michael@0 235 // Get the character from "t" that corresponds to this row.
michael@0 236 const char16_t tch = t[ti - 1];
michael@0 237
michael@0 238 // Compute the remaining cells in this row, left-to-right,
michael@0 239 // starting at the second column (and first character of "s").
michael@0 240 for (uint32_t si = 1; si <= sLen; si++) {
michael@0 241
michael@0 242 // Get the character from "s" that corresponds to this column,
michael@0 243 // compare it to the t-character, and compute the "cost".
michael@0 244 const char16_t sch = s[si - 1];
michael@0 245 int cost = (sch == tch) ? 0 : 1;
michael@0 246
michael@0 247 // ............ We want to calculate the value of cell "d" from
michael@0 248 // ...ab....... the previously calculated (or initialized) cells
michael@0 249 // ...cd....... "a", "b", and "c", where d = min(a', b', c').
michael@0 250 // ............
michael@0 251 int aPrime = prevRow[si - 1] + cost;
michael@0 252 int bPrime = prevRow[si] + 1;
michael@0 253 int cPrime = currRow[si - 1] + 1;
michael@0 254 currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
michael@0 255 }
michael@0 256
michael@0 257 // Advance to the next row. The current row becomes the previous
michael@0 258 // row and we recycle the old previous row as the new current row.
michael@0 259 // We don't need to re-initialize the new current row since we will
michael@0 260 // rewrite all of its cells anyway.
michael@0 261 int *oldPrevRow = prevRow;
michael@0 262 prevRow = currRow;
michael@0 263 currRow = oldPrevRow;
michael@0 264 }
michael@0 265
michael@0 266 // The final result is the value of the last cell in the last row.
michael@0 267 // Note that that's now in the "previous" row, since we just swapped them.
michael@0 268 *_result = prevRow[sLen];
michael@0 269 return SQLITE_OK;
michael@0 270 }
michael@0 271
michael@0 272 // This struct is used only by registerFunctions below, but ISO C++98 forbids
michael@0 273 // instantiating a template dependent on a locally-defined type. Boo-urns!
michael@0 274 struct Functions {
michael@0 275 const char *zName;
michael@0 276 int nArg;
michael@0 277 int enc;
michael@0 278 void *pContext;
michael@0 279 void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
michael@0 280 };
michael@0 281
michael@0 282 } // anonymous namespace
michael@0 283
michael@0 284 ////////////////////////////////////////////////////////////////////////////////
michael@0 285 //// Exposed Functions
michael@0 286
michael@0 287 int
michael@0 288 registerFunctions(sqlite3 *aDB)
michael@0 289 {
michael@0 290 Functions functions[] = {
michael@0 291 {"lower",
michael@0 292 1,
michael@0 293 SQLITE_UTF16,
michael@0 294 0,
michael@0 295 caseFunction},
michael@0 296 {"lower",
michael@0 297 1,
michael@0 298 SQLITE_UTF8,
michael@0 299 0,
michael@0 300 caseFunction},
michael@0 301 {"upper",
michael@0 302 1,
michael@0 303 SQLITE_UTF16,
michael@0 304 (void*)1,
michael@0 305 caseFunction},
michael@0 306 {"upper",
michael@0 307 1,
michael@0 308 SQLITE_UTF8,
michael@0 309 (void*)1,
michael@0 310 caseFunction},
michael@0 311
michael@0 312 {"like",
michael@0 313 2,
michael@0 314 SQLITE_UTF16,
michael@0 315 0,
michael@0 316 likeFunction},
michael@0 317 {"like",
michael@0 318 2,
michael@0 319 SQLITE_UTF8,
michael@0 320 0,
michael@0 321 likeFunction},
michael@0 322 {"like",
michael@0 323 3,
michael@0 324 SQLITE_UTF16,
michael@0 325 0,
michael@0 326 likeFunction},
michael@0 327 {"like",
michael@0 328 3,
michael@0 329 SQLITE_UTF8,
michael@0 330 0,
michael@0 331 likeFunction},
michael@0 332
michael@0 333 {"levenshteinDistance",
michael@0 334 2,
michael@0 335 SQLITE_UTF16,
michael@0 336 0,
michael@0 337 levenshteinDistanceFunction},
michael@0 338 {"levenshteinDistance",
michael@0 339 2,
michael@0 340 SQLITE_UTF8,
michael@0 341 0,
michael@0 342 levenshteinDistanceFunction},
michael@0 343 };
michael@0 344
michael@0 345 int rv = SQLITE_OK;
michael@0 346 for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
michael@0 347 struct Functions *p = &functions[i];
michael@0 348 rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
michael@0 349 p->xFunc, nullptr, nullptr);
michael@0 350 }
michael@0 351
michael@0 352 return rv;
michael@0 353 }
michael@0 354
michael@0 355 ////////////////////////////////////////////////////////////////////////////////
michael@0 356 //// SQL Functions
michael@0 357
michael@0 358 void
michael@0 359 caseFunction(sqlite3_context *aCtx,
michael@0 360 int aArgc,
michael@0 361 sqlite3_value **aArgv)
michael@0 362 {
michael@0 363 NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
michael@0 364
michael@0 365 nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
michael@0 366 bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
michael@0 367
michael@0 368 if (toUpper)
michael@0 369 ::ToUpperCase(data);
michael@0 370 else
michael@0 371 ::ToLowerCase(data);
michael@0 372
michael@0 373 // Set the result.
michael@0 374 ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
michael@0 375 }
michael@0 376
michael@0 377 /**
michael@0 378 * This implements the like() SQL function. This is used by the LIKE operator.
michael@0 379 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
michael@0 380 * an escape character, say E, it is implemented as 'like(B, A, E)'.
michael@0 381 */
michael@0 382 void
michael@0 383 likeFunction(sqlite3_context *aCtx,
michael@0 384 int aArgc,
michael@0 385 sqlite3_value **aArgv)
michael@0 386 {
michael@0 387 NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
michael@0 388
michael@0 389 if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
michael@0 390 ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
michael@0 391 SQLITE_TOOBIG);
michael@0 392 return;
michael@0 393 }
michael@0 394
michael@0 395 if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
michael@0 396 return;
michael@0 397
michael@0 398 nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])));
michael@0 399 nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
michael@0 400 NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
michael@0 401
michael@0 402 char16_t E = 0;
michael@0 403 if (3 == aArgc)
michael@0 404 E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0];
michael@0 405
michael@0 406 nsAString::const_iterator itrString, endString;
michael@0 407 A.BeginReading(itrString);
michael@0 408 A.EndReading(endString);
michael@0 409 nsAString::const_iterator itrPattern, endPattern;
michael@0 410 B.BeginReading(itrPattern);
michael@0 411 B.EndReading(endPattern);
michael@0 412 ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString,
michael@0 413 endString, E));
michael@0 414 }
michael@0 415
michael@0 416 void levenshteinDistanceFunction(sqlite3_context *aCtx,
michael@0 417 int aArgc,
michael@0 418 sqlite3_value **aArgv)
michael@0 419 {
michael@0 420 NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
michael@0 421
michael@0 422 // If either argument is a SQL NULL, then return SQL NULL.
michael@0 423 if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
michael@0 424 ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
michael@0 425 ::sqlite3_result_null(aCtx);
michael@0 426 return;
michael@0 427 }
michael@0 428
michael@0 429 int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
michael@0 430 const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]));
michael@0 431
michael@0 432 int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
michael@0 433 const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]));
michael@0 434
michael@0 435 // Compute the Levenshtein Distance, and return the result (or error).
michael@0 436 int distance = -1;
michael@0 437 const nsDependentString A(a, aLen);
michael@0 438 const nsDependentString B(b, bLen);
michael@0 439 int status = levenshteinDistance(A, B, &distance);
michael@0 440 if (status == SQLITE_OK) {
michael@0 441 ::sqlite3_result_int(aCtx, distance);
michael@0 442 }
michael@0 443 else if (status == SQLITE_NOMEM) {
michael@0 444 ::sqlite3_result_error_nomem(aCtx);
michael@0 445 }
michael@0 446 else {
michael@0 447 ::sqlite3_result_error(aCtx, "User function returned error code", -1);
michael@0 448 }
michael@0 449 }
michael@0 450
michael@0 451 } // namespace storage
michael@0 452 } // namespace mozilla

mercurial