1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/storage/src/mozStorageSQLFunctions.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,452 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- 1.5 + * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ : 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "mozilla/ArrayUtils.h" 1.11 + 1.12 +#include "mozStorageSQLFunctions.h" 1.13 +#include "nsUnicharUtils.h" 1.14 +#include <algorithm> 1.15 + 1.16 +namespace mozilla { 1.17 +namespace storage { 1.18 + 1.19 +//////////////////////////////////////////////////////////////////////////////// 1.20 +//// Local Helper Functions 1.21 + 1.22 +namespace { 1.23 + 1.24 +/** 1.25 + * Performs the LIKE comparison of a string against a pattern. For more detail 1.26 + * see http://www.sqlite.org/lang_expr.html#like. 1.27 + * 1.28 + * @param aPatternItr 1.29 + * An iterator at the start of the pattern to check for. 1.30 + * @param aPatternEnd 1.31 + * An iterator at the end of the pattern to check for. 1.32 + * @param aStringItr 1.33 + * An iterator at the start of the string to check for the pattern. 1.34 + * @param aStringEnd 1.35 + * An iterator at the end of the string to check for the pattern. 1.36 + * @param aEscapeChar 1.37 + * The character to use for escaping symbols in the pattern. 1.38 + * @return 1 if the pattern is found, 0 otherwise. 1.39 + */ 1.40 +int 1.41 +likeCompare(nsAString::const_iterator aPatternItr, 1.42 + nsAString::const_iterator aPatternEnd, 1.43 + nsAString::const_iterator aStringItr, 1.44 + nsAString::const_iterator aStringEnd, 1.45 + char16_t aEscapeChar) 1.46 +{ 1.47 + const char16_t MATCH_ALL('%'); 1.48 + const char16_t MATCH_ONE('_'); 1.49 + 1.50 + bool lastWasEscape = false; 1.51 + while (aPatternItr != aPatternEnd) { 1.52 + /** 1.53 + * What we do in here is take a look at each character from the input 1.54 + * pattern, and do something with it. There are 4 possibilities: 1.55 + * 1) character is an un-escaped match-all character 1.56 + * 2) character is an un-escaped match-one character 1.57 + * 3) character is an un-escaped escape character 1.58 + * 4) character is not any of the above 1.59 + */ 1.60 + if (!lastWasEscape && *aPatternItr == MATCH_ALL) { 1.61 + // CASE 1 1.62 + /** 1.63 + * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a 1.64 + * MATCH_ALL character. For each MATCH_ONE character, skip one character 1.65 + * in the pattern string. 1.66 + */ 1.67 + while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) { 1.68 + if (*aPatternItr == MATCH_ONE) { 1.69 + // If we've hit the end of the string we are testing, no match 1.70 + if (aStringItr == aStringEnd) 1.71 + return 0; 1.72 + aStringItr++; 1.73 + } 1.74 + aPatternItr++; 1.75 + } 1.76 + 1.77 + // If we've hit the end of the pattern string, match 1.78 + if (aPatternItr == aPatternEnd) 1.79 + return 1; 1.80 + 1.81 + while (aStringItr != aStringEnd) { 1.82 + if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd, 1.83 + aEscapeChar)) { 1.84 + // we've hit a match, so indicate this 1.85 + return 1; 1.86 + } 1.87 + aStringItr++; 1.88 + } 1.89 + 1.90 + // No match 1.91 + return 0; 1.92 + } 1.93 + else if (!lastWasEscape && *aPatternItr == MATCH_ONE) { 1.94 + // CASE 2 1.95 + if (aStringItr == aStringEnd) { 1.96 + // If we've hit the end of the string we are testing, no match 1.97 + return 0; 1.98 + } 1.99 + aStringItr++; 1.100 + lastWasEscape = false; 1.101 + } 1.102 + else if (!lastWasEscape && *aPatternItr == aEscapeChar) { 1.103 + // CASE 3 1.104 + lastWasEscape = true; 1.105 + } 1.106 + else { 1.107 + // CASE 4 1.108 + if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) { 1.109 + // If we've hit a point where the strings don't match, there is no match 1.110 + return 0; 1.111 + } 1.112 + aStringItr++; 1.113 + lastWasEscape = false; 1.114 + } 1.115 + 1.116 + aPatternItr++; 1.117 + } 1.118 + 1.119 + return aStringItr == aStringEnd; 1.120 +} 1.121 + 1.122 +/** 1.123 + * This class manages a dynamic array. It can represent an array of any 1.124 + * reasonable size, but if the array is "N" elements or smaller, it will be 1.125 + * stored using fixed space inside the auto array itself. If the auto array 1.126 + * is a local variable, this internal storage will be allocated cheaply on the 1.127 + * stack, similar to nsAutoString. If a larger size is requested, the memory 1.128 + * will be dynamically allocated from the heap. Since the destructor will 1.129 + * free any heap-allocated memory, client code doesn't need to care where the 1.130 + * memory came from. 1.131 + */ 1.132 +template <class T, size_t N> class AutoArray 1.133 +{ 1.134 + 1.135 +public: 1.136 + 1.137 + AutoArray(size_t size) 1.138 + : mBuffer(size <= N ? mAutoBuffer : new T[size]) 1.139 + { 1.140 + } 1.141 + 1.142 + ~AutoArray() 1.143 + { 1.144 + if (mBuffer != mAutoBuffer) 1.145 + delete[] mBuffer; 1.146 + } 1.147 + 1.148 + /** 1.149 + * Return the pointer to the allocated array. 1.150 + * @note If the array allocation failed, get() will return nullptr! 1.151 + * 1.152 + * @return the pointer to the allocated array 1.153 + */ 1.154 + T *get() 1.155 + { 1.156 + return mBuffer; 1.157 + } 1.158 + 1.159 +private: 1.160 + T *mBuffer; // Points to mAutoBuffer if we can use it, heap otherwise. 1.161 + T mAutoBuffer[N]; // The internal memory buffer that we use if we can. 1.162 +}; 1.163 + 1.164 +/** 1.165 + * Compute the Levenshtein Edit Distance between two strings. 1.166 + * 1.167 + * @param aStringS 1.168 + * a string 1.169 + * @param aStringT 1.170 + * another string 1.171 + * @param _result 1.172 + * an outparam that will receive the edit distance between the arguments 1.173 + * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc. 1.174 + */ 1.175 +int 1.176 +levenshteinDistance(const nsAString &aStringS, 1.177 + const nsAString &aStringT, 1.178 + int *_result) 1.179 +{ 1.180 + // Set the result to a non-sensical value in case we encounter an error. 1.181 + *_result = -1; 1.182 + 1.183 + const uint32_t sLen = aStringS.Length(); 1.184 + const uint32_t tLen = aStringT.Length(); 1.185 + 1.186 + if (sLen == 0) { 1.187 + *_result = tLen; 1.188 + return SQLITE_OK; 1.189 + } 1.190 + if (tLen == 0) { 1.191 + *_result = sLen; 1.192 + return SQLITE_OK; 1.193 + } 1.194 + 1.195 + // Notionally, Levenshtein Distance is computed in a matrix. If we 1.196 + // assume s = "span" and t = "spam", the matrix would look like this: 1.197 + // s --> 1.198 + // t s p a n 1.199 + // | 0 1 2 3 4 1.200 + // V s 1 * * * * 1.201 + // p 2 * * * * 1.202 + // a 3 * * * * 1.203 + // m 4 * * * * 1.204 + // 1.205 + // Note that the row width is sLen + 1 and the column height is tLen + 1, 1.206 + // where sLen is the length of the string "s" and tLen is the length of "t". 1.207 + // The first row and the first column are initialized as shown, and 1.208 + // the algorithm computes the remaining cells row-by-row, and 1.209 + // left-to-right within each row. The computation only requires that 1.210 + // we be able to see the current row and the previous one. 1.211 + 1.212 + // Allocate memory for two rows. Use AutoArray's to manage the memory 1.213 + // so we don't have to explicitly free it, and so we can avoid the expense 1.214 + // of memory allocations for relatively small strings. 1.215 + AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1); 1.216 + AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1); 1.217 + 1.218 + // Declare the raw pointers that will actually be used to access the memory. 1.219 + int *prevRow = row1.get(); 1.220 + NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM); 1.221 + int *currRow = row2.get(); 1.222 + NS_ENSURE_TRUE(currRow, SQLITE_NOMEM); 1.223 + 1.224 + // Initialize the first row. 1.225 + for (uint32_t i = 0; i <= sLen; i++) 1.226 + prevRow[i] = i; 1.227 + 1.228 + const char16_t *s = aStringS.BeginReading(); 1.229 + const char16_t *t = aStringT.BeginReading(); 1.230 + 1.231 + // Compute the empty cells in the "matrix" row-by-row, starting with 1.232 + // the second row. 1.233 + for (uint32_t ti = 1; ti <= tLen; ti++) { 1.234 + 1.235 + // Initialize the first cell in this row. 1.236 + currRow[0] = ti; 1.237 + 1.238 + // Get the character from "t" that corresponds to this row. 1.239 + const char16_t tch = t[ti - 1]; 1.240 + 1.241 + // Compute the remaining cells in this row, left-to-right, 1.242 + // starting at the second column (and first character of "s"). 1.243 + for (uint32_t si = 1; si <= sLen; si++) { 1.244 + 1.245 + // Get the character from "s" that corresponds to this column, 1.246 + // compare it to the t-character, and compute the "cost". 1.247 + const char16_t sch = s[si - 1]; 1.248 + int cost = (sch == tch) ? 0 : 1; 1.249 + 1.250 + // ............ We want to calculate the value of cell "d" from 1.251 + // ...ab....... the previously calculated (or initialized) cells 1.252 + // ...cd....... "a", "b", and "c", where d = min(a', b', c'). 1.253 + // ............ 1.254 + int aPrime = prevRow[si - 1] + cost; 1.255 + int bPrime = prevRow[si] + 1; 1.256 + int cPrime = currRow[si - 1] + 1; 1.257 + currRow[si] = std::min(aPrime, std::min(bPrime, cPrime)); 1.258 + } 1.259 + 1.260 + // Advance to the next row. The current row becomes the previous 1.261 + // row and we recycle the old previous row as the new current row. 1.262 + // We don't need to re-initialize the new current row since we will 1.263 + // rewrite all of its cells anyway. 1.264 + int *oldPrevRow = prevRow; 1.265 + prevRow = currRow; 1.266 + currRow = oldPrevRow; 1.267 + } 1.268 + 1.269 + // The final result is the value of the last cell in the last row. 1.270 + // Note that that's now in the "previous" row, since we just swapped them. 1.271 + *_result = prevRow[sLen]; 1.272 + return SQLITE_OK; 1.273 +} 1.274 + 1.275 +// This struct is used only by registerFunctions below, but ISO C++98 forbids 1.276 +// instantiating a template dependent on a locally-defined type. Boo-urns! 1.277 +struct Functions { 1.278 + const char *zName; 1.279 + int nArg; 1.280 + int enc; 1.281 + void *pContext; 1.282 + void (*xFunc)(::sqlite3_context*, int, sqlite3_value**); 1.283 +}; 1.284 + 1.285 +} // anonymous namespace 1.286 + 1.287 +//////////////////////////////////////////////////////////////////////////////// 1.288 +//// Exposed Functions 1.289 + 1.290 +int 1.291 +registerFunctions(sqlite3 *aDB) 1.292 +{ 1.293 + Functions functions[] = { 1.294 + {"lower", 1.295 + 1, 1.296 + SQLITE_UTF16, 1.297 + 0, 1.298 + caseFunction}, 1.299 + {"lower", 1.300 + 1, 1.301 + SQLITE_UTF8, 1.302 + 0, 1.303 + caseFunction}, 1.304 + {"upper", 1.305 + 1, 1.306 + SQLITE_UTF16, 1.307 + (void*)1, 1.308 + caseFunction}, 1.309 + {"upper", 1.310 + 1, 1.311 + SQLITE_UTF8, 1.312 + (void*)1, 1.313 + caseFunction}, 1.314 + 1.315 + {"like", 1.316 + 2, 1.317 + SQLITE_UTF16, 1.318 + 0, 1.319 + likeFunction}, 1.320 + {"like", 1.321 + 2, 1.322 + SQLITE_UTF8, 1.323 + 0, 1.324 + likeFunction}, 1.325 + {"like", 1.326 + 3, 1.327 + SQLITE_UTF16, 1.328 + 0, 1.329 + likeFunction}, 1.330 + {"like", 1.331 + 3, 1.332 + SQLITE_UTF8, 1.333 + 0, 1.334 + likeFunction}, 1.335 + 1.336 + {"levenshteinDistance", 1.337 + 2, 1.338 + SQLITE_UTF16, 1.339 + 0, 1.340 + levenshteinDistanceFunction}, 1.341 + {"levenshteinDistance", 1.342 + 2, 1.343 + SQLITE_UTF8, 1.344 + 0, 1.345 + levenshteinDistanceFunction}, 1.346 + }; 1.347 + 1.348 + int rv = SQLITE_OK; 1.349 + for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) { 1.350 + struct Functions *p = &functions[i]; 1.351 + rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext, 1.352 + p->xFunc, nullptr, nullptr); 1.353 + } 1.354 + 1.355 + return rv; 1.356 +} 1.357 + 1.358 +//////////////////////////////////////////////////////////////////////////////// 1.359 +//// SQL Functions 1.360 + 1.361 +void 1.362 +caseFunction(sqlite3_context *aCtx, 1.363 + int aArgc, 1.364 + sqlite3_value **aArgv) 1.365 +{ 1.366 + NS_ASSERTION(1 == aArgc, "Invalid number of arguments!"); 1.367 + 1.368 + nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); 1.369 + bool toUpper = ::sqlite3_user_data(aCtx) ? true : false; 1.370 + 1.371 + if (toUpper) 1.372 + ::ToUpperCase(data); 1.373 + else 1.374 + ::ToLowerCase(data); 1.375 + 1.376 + // Set the result. 1.377 + ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT); 1.378 +} 1.379 + 1.380 +/** 1.381 + * This implements the like() SQL function. This is used by the LIKE operator. 1.382 + * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is 1.383 + * an escape character, say E, it is implemented as 'like(B, A, E)'. 1.384 + */ 1.385 +void 1.386 +likeFunction(sqlite3_context *aCtx, 1.387 + int aArgc, 1.388 + sqlite3_value **aArgv) 1.389 +{ 1.390 + NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!"); 1.391 + 1.392 + if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) { 1.393 + ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex", 1.394 + SQLITE_TOOBIG); 1.395 + return; 1.396 + } 1.397 + 1.398 + if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1])) 1.399 + return; 1.400 + 1.401 + nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]))); 1.402 + nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); 1.403 + NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!"); 1.404 + 1.405 + char16_t E = 0; 1.406 + if (3 == aArgc) 1.407 + E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0]; 1.408 + 1.409 + nsAString::const_iterator itrString, endString; 1.410 + A.BeginReading(itrString); 1.411 + A.EndReading(endString); 1.412 + nsAString::const_iterator itrPattern, endPattern; 1.413 + B.BeginReading(itrPattern); 1.414 + B.EndReading(endPattern); 1.415 + ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString, 1.416 + endString, E)); 1.417 +} 1.418 + 1.419 +void levenshteinDistanceFunction(sqlite3_context *aCtx, 1.420 + int aArgc, 1.421 + sqlite3_value **aArgv) 1.422 +{ 1.423 + NS_ASSERTION(2 == aArgc, "Invalid number of arguments!"); 1.424 + 1.425 + // If either argument is a SQL NULL, then return SQL NULL. 1.426 + if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL || 1.427 + ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) { 1.428 + ::sqlite3_result_null(aCtx); 1.429 + return; 1.430 + } 1.431 + 1.432 + int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t); 1.433 + const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])); 1.434 + 1.435 + int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t); 1.436 + const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])); 1.437 + 1.438 + // Compute the Levenshtein Distance, and return the result (or error). 1.439 + int distance = -1; 1.440 + const nsDependentString A(a, aLen); 1.441 + const nsDependentString B(b, bLen); 1.442 + int status = levenshteinDistance(A, B, &distance); 1.443 + if (status == SQLITE_OK) { 1.444 + ::sqlite3_result_int(aCtx, distance); 1.445 + } 1.446 + else if (status == SQLITE_NOMEM) { 1.447 + ::sqlite3_result_error_nomem(aCtx); 1.448 + } 1.449 + else { 1.450 + ::sqlite3_result_error(aCtx, "User function returned error code", -1); 1.451 + } 1.452 +} 1.453 + 1.454 +} // namespace storage 1.455 +} // namespace mozilla