michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- michael@0: * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ : michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozilla/ArrayUtils.h" michael@0: michael@0: #include "mozStorageSQLFunctions.h" michael@0: #include "nsUnicharUtils.h" michael@0: #include michael@0: michael@0: namespace mozilla { michael@0: namespace storage { michael@0: michael@0: //////////////////////////////////////////////////////////////////////////////// michael@0: //// Local Helper Functions michael@0: michael@0: namespace { michael@0: michael@0: /** michael@0: * Performs the LIKE comparison of a string against a pattern. For more detail michael@0: * see http://www.sqlite.org/lang_expr.html#like. michael@0: * michael@0: * @param aPatternItr michael@0: * An iterator at the start of the pattern to check for. michael@0: * @param aPatternEnd michael@0: * An iterator at the end of the pattern to check for. michael@0: * @param aStringItr michael@0: * An iterator at the start of the string to check for the pattern. michael@0: * @param aStringEnd michael@0: * An iterator at the end of the string to check for the pattern. michael@0: * @param aEscapeChar michael@0: * The character to use for escaping symbols in the pattern. michael@0: * @return 1 if the pattern is found, 0 otherwise. michael@0: */ michael@0: int michael@0: likeCompare(nsAString::const_iterator aPatternItr, michael@0: nsAString::const_iterator aPatternEnd, michael@0: nsAString::const_iterator aStringItr, michael@0: nsAString::const_iterator aStringEnd, michael@0: char16_t aEscapeChar) michael@0: { michael@0: const char16_t MATCH_ALL('%'); michael@0: const char16_t MATCH_ONE('_'); michael@0: michael@0: bool lastWasEscape = false; michael@0: while (aPatternItr != aPatternEnd) { michael@0: /** michael@0: * What we do in here is take a look at each character from the input michael@0: * pattern, and do something with it. There are 4 possibilities: michael@0: * 1) character is an un-escaped match-all character michael@0: * 2) character is an un-escaped match-one character michael@0: * 3) character is an un-escaped escape character michael@0: * 4) character is not any of the above michael@0: */ michael@0: if (!lastWasEscape && *aPatternItr == MATCH_ALL) { michael@0: // CASE 1 michael@0: /** michael@0: * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a michael@0: * MATCH_ALL character. For each MATCH_ONE character, skip one character michael@0: * in the pattern string. michael@0: */ michael@0: while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) { michael@0: if (*aPatternItr == MATCH_ONE) { michael@0: // If we've hit the end of the string we are testing, no match michael@0: if (aStringItr == aStringEnd) michael@0: return 0; michael@0: aStringItr++; michael@0: } michael@0: aPatternItr++; michael@0: } michael@0: michael@0: // If we've hit the end of the pattern string, match michael@0: if (aPatternItr == aPatternEnd) michael@0: return 1; michael@0: michael@0: while (aStringItr != aStringEnd) { michael@0: if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd, michael@0: aEscapeChar)) { michael@0: // we've hit a match, so indicate this michael@0: return 1; michael@0: } michael@0: aStringItr++; michael@0: } michael@0: michael@0: // No match michael@0: return 0; michael@0: } michael@0: else if (!lastWasEscape && *aPatternItr == MATCH_ONE) { michael@0: // CASE 2 michael@0: if (aStringItr == aStringEnd) { michael@0: // If we've hit the end of the string we are testing, no match michael@0: return 0; michael@0: } michael@0: aStringItr++; michael@0: lastWasEscape = false; michael@0: } michael@0: else if (!lastWasEscape && *aPatternItr == aEscapeChar) { michael@0: // CASE 3 michael@0: lastWasEscape = true; michael@0: } michael@0: else { michael@0: // CASE 4 michael@0: if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) { michael@0: // If we've hit a point where the strings don't match, there is no match michael@0: return 0; michael@0: } michael@0: aStringItr++; michael@0: lastWasEscape = false; michael@0: } michael@0: michael@0: aPatternItr++; michael@0: } michael@0: michael@0: return aStringItr == aStringEnd; michael@0: } michael@0: michael@0: /** michael@0: * This class manages a dynamic array. It can represent an array of any michael@0: * reasonable size, but if the array is "N" elements or smaller, it will be michael@0: * stored using fixed space inside the auto array itself. If the auto array michael@0: * is a local variable, this internal storage will be allocated cheaply on the michael@0: * stack, similar to nsAutoString. If a larger size is requested, the memory michael@0: * will be dynamically allocated from the heap. Since the destructor will michael@0: * free any heap-allocated memory, client code doesn't need to care where the michael@0: * memory came from. michael@0: */ michael@0: template class AutoArray michael@0: { michael@0: michael@0: public: michael@0: michael@0: AutoArray(size_t size) michael@0: : mBuffer(size <= N ? mAutoBuffer : new T[size]) michael@0: { michael@0: } michael@0: michael@0: ~AutoArray() michael@0: { michael@0: if (mBuffer != mAutoBuffer) michael@0: delete[] mBuffer; michael@0: } michael@0: michael@0: /** michael@0: * Return the pointer to the allocated array. michael@0: * @note If the array allocation failed, get() will return nullptr! michael@0: * michael@0: * @return the pointer to the allocated array michael@0: */ michael@0: T *get() michael@0: { michael@0: return mBuffer; michael@0: } michael@0: michael@0: private: michael@0: T *mBuffer; // Points to mAutoBuffer if we can use it, heap otherwise. michael@0: T mAutoBuffer[N]; // The internal memory buffer that we use if we can. michael@0: }; michael@0: michael@0: /** michael@0: * Compute the Levenshtein Edit Distance between two strings. michael@0: * michael@0: * @param aStringS michael@0: * a string michael@0: * @param aStringT michael@0: * another string michael@0: * @param _result michael@0: * an outparam that will receive the edit distance between the arguments michael@0: * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc. michael@0: */ michael@0: int michael@0: levenshteinDistance(const nsAString &aStringS, michael@0: const nsAString &aStringT, michael@0: int *_result) michael@0: { michael@0: // Set the result to a non-sensical value in case we encounter an error. michael@0: *_result = -1; michael@0: michael@0: const uint32_t sLen = aStringS.Length(); michael@0: const uint32_t tLen = aStringT.Length(); michael@0: michael@0: if (sLen == 0) { michael@0: *_result = tLen; michael@0: return SQLITE_OK; michael@0: } michael@0: if (tLen == 0) { michael@0: *_result = sLen; michael@0: return SQLITE_OK; michael@0: } michael@0: michael@0: // Notionally, Levenshtein Distance is computed in a matrix. If we michael@0: // assume s = "span" and t = "spam", the matrix would look like this: michael@0: // s --> michael@0: // t s p a n michael@0: // | 0 1 2 3 4 michael@0: // V s 1 * * * * michael@0: // p 2 * * * * michael@0: // a 3 * * * * michael@0: // m 4 * * * * michael@0: // michael@0: // Note that the row width is sLen + 1 and the column height is tLen + 1, michael@0: // where sLen is the length of the string "s" and tLen is the length of "t". michael@0: // The first row and the first column are initialized as shown, and michael@0: // the algorithm computes the remaining cells row-by-row, and michael@0: // left-to-right within each row. The computation only requires that michael@0: // we be able to see the current row and the previous one. michael@0: michael@0: // Allocate memory for two rows. Use AutoArray's to manage the memory michael@0: // so we don't have to explicitly free it, and so we can avoid the expense michael@0: // of memory allocations for relatively small strings. michael@0: AutoArray row1(sLen + 1); michael@0: AutoArray row2(sLen + 1); michael@0: michael@0: // Declare the raw pointers that will actually be used to access the memory. michael@0: int *prevRow = row1.get(); michael@0: NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM); michael@0: int *currRow = row2.get(); michael@0: NS_ENSURE_TRUE(currRow, SQLITE_NOMEM); michael@0: michael@0: // Initialize the first row. michael@0: for (uint32_t i = 0; i <= sLen; i++) michael@0: prevRow[i] = i; michael@0: michael@0: const char16_t *s = aStringS.BeginReading(); michael@0: const char16_t *t = aStringT.BeginReading(); michael@0: michael@0: // Compute the empty cells in the "matrix" row-by-row, starting with michael@0: // the second row. michael@0: for (uint32_t ti = 1; ti <= tLen; ti++) { michael@0: michael@0: // Initialize the first cell in this row. michael@0: currRow[0] = ti; michael@0: michael@0: // Get the character from "t" that corresponds to this row. michael@0: const char16_t tch = t[ti - 1]; michael@0: michael@0: // Compute the remaining cells in this row, left-to-right, michael@0: // starting at the second column (and first character of "s"). michael@0: for (uint32_t si = 1; si <= sLen; si++) { michael@0: michael@0: // Get the character from "s" that corresponds to this column, michael@0: // compare it to the t-character, and compute the "cost". michael@0: const char16_t sch = s[si - 1]; michael@0: int cost = (sch == tch) ? 0 : 1; michael@0: michael@0: // ............ We want to calculate the value of cell "d" from michael@0: // ...ab....... the previously calculated (or initialized) cells michael@0: // ...cd....... "a", "b", and "c", where d = min(a', b', c'). michael@0: // ............ michael@0: int aPrime = prevRow[si - 1] + cost; michael@0: int bPrime = prevRow[si] + 1; michael@0: int cPrime = currRow[si - 1] + 1; michael@0: currRow[si] = std::min(aPrime, std::min(bPrime, cPrime)); michael@0: } michael@0: michael@0: // Advance to the next row. The current row becomes the previous michael@0: // row and we recycle the old previous row as the new current row. michael@0: // We don't need to re-initialize the new current row since we will michael@0: // rewrite all of its cells anyway. michael@0: int *oldPrevRow = prevRow; michael@0: prevRow = currRow; michael@0: currRow = oldPrevRow; michael@0: } michael@0: michael@0: // The final result is the value of the last cell in the last row. michael@0: // Note that that's now in the "previous" row, since we just swapped them. michael@0: *_result = prevRow[sLen]; michael@0: return SQLITE_OK; michael@0: } michael@0: michael@0: // This struct is used only by registerFunctions below, but ISO C++98 forbids michael@0: // instantiating a template dependent on a locally-defined type. Boo-urns! michael@0: struct Functions { michael@0: const char *zName; michael@0: int nArg; michael@0: int enc; michael@0: void *pContext; michael@0: void (*xFunc)(::sqlite3_context*, int, sqlite3_value**); michael@0: }; michael@0: michael@0: } // anonymous namespace michael@0: michael@0: //////////////////////////////////////////////////////////////////////////////// michael@0: //// Exposed Functions michael@0: michael@0: int michael@0: registerFunctions(sqlite3 *aDB) michael@0: { michael@0: Functions functions[] = { michael@0: {"lower", michael@0: 1, michael@0: SQLITE_UTF16, michael@0: 0, michael@0: caseFunction}, michael@0: {"lower", michael@0: 1, michael@0: SQLITE_UTF8, michael@0: 0, michael@0: caseFunction}, michael@0: {"upper", michael@0: 1, michael@0: SQLITE_UTF16, michael@0: (void*)1, michael@0: caseFunction}, michael@0: {"upper", michael@0: 1, michael@0: SQLITE_UTF8, michael@0: (void*)1, michael@0: caseFunction}, michael@0: michael@0: {"like", michael@0: 2, michael@0: SQLITE_UTF16, michael@0: 0, michael@0: likeFunction}, michael@0: {"like", michael@0: 2, michael@0: SQLITE_UTF8, michael@0: 0, michael@0: likeFunction}, michael@0: {"like", michael@0: 3, michael@0: SQLITE_UTF16, michael@0: 0, michael@0: likeFunction}, michael@0: {"like", michael@0: 3, michael@0: SQLITE_UTF8, michael@0: 0, michael@0: likeFunction}, michael@0: michael@0: {"levenshteinDistance", michael@0: 2, michael@0: SQLITE_UTF16, michael@0: 0, michael@0: levenshteinDistanceFunction}, michael@0: {"levenshteinDistance", michael@0: 2, michael@0: SQLITE_UTF8, michael@0: 0, michael@0: levenshteinDistanceFunction}, michael@0: }; michael@0: michael@0: int rv = SQLITE_OK; michael@0: for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) { michael@0: struct Functions *p = &functions[i]; michael@0: rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext, michael@0: p->xFunc, nullptr, nullptr); michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: //////////////////////////////////////////////////////////////////////////////// michael@0: //// SQL Functions michael@0: michael@0: void michael@0: caseFunction(sqlite3_context *aCtx, michael@0: int aArgc, michael@0: sqlite3_value **aArgv) michael@0: { michael@0: NS_ASSERTION(1 == aArgc, "Invalid number of arguments!"); michael@0: michael@0: nsAutoString data(static_cast(::sqlite3_value_text16(aArgv[0]))); michael@0: bool toUpper = ::sqlite3_user_data(aCtx) ? true : false; michael@0: michael@0: if (toUpper) michael@0: ::ToUpperCase(data); michael@0: else michael@0: ::ToLowerCase(data); michael@0: michael@0: // Set the result. michael@0: ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT); michael@0: } michael@0: michael@0: /** michael@0: * This implements the like() SQL function. This is used by the LIKE operator. michael@0: * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is michael@0: * an escape character, say E, it is implemented as 'like(B, A, E)'. michael@0: */ michael@0: void michael@0: likeFunction(sqlite3_context *aCtx, michael@0: int aArgc, michael@0: sqlite3_value **aArgv) michael@0: { michael@0: NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!"); michael@0: michael@0: if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) { michael@0: ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex", michael@0: SQLITE_TOOBIG); michael@0: return; michael@0: } michael@0: michael@0: if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1])) michael@0: return; michael@0: michael@0: nsDependentString A(static_cast(::sqlite3_value_text16(aArgv[1]))); michael@0: nsDependentString B(static_cast(::sqlite3_value_text16(aArgv[0]))); michael@0: NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!"); michael@0: michael@0: char16_t E = 0; michael@0: if (3 == aArgc) michael@0: E = static_cast(::sqlite3_value_text16(aArgv[2]))[0]; michael@0: michael@0: nsAString::const_iterator itrString, endString; michael@0: A.BeginReading(itrString); michael@0: A.EndReading(endString); michael@0: nsAString::const_iterator itrPattern, endPattern; michael@0: B.BeginReading(itrPattern); michael@0: B.EndReading(endPattern); michael@0: ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString, michael@0: endString, E)); michael@0: } michael@0: michael@0: void levenshteinDistanceFunction(sqlite3_context *aCtx, michael@0: int aArgc, michael@0: sqlite3_value **aArgv) michael@0: { michael@0: NS_ASSERTION(2 == aArgc, "Invalid number of arguments!"); michael@0: michael@0: // If either argument is a SQL NULL, then return SQL NULL. michael@0: if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL || michael@0: ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) { michael@0: ::sqlite3_result_null(aCtx); michael@0: return; michael@0: } michael@0: michael@0: int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t); michael@0: const char16_t *a = static_cast(::sqlite3_value_text16(aArgv[0])); michael@0: michael@0: int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t); michael@0: const char16_t *b = static_cast(::sqlite3_value_text16(aArgv[1])); michael@0: michael@0: // Compute the Levenshtein Distance, and return the result (or error). michael@0: int distance = -1; michael@0: const nsDependentString A(a, aLen); michael@0: const nsDependentString B(b, bLen); michael@0: int status = levenshteinDistance(A, B, &distance); michael@0: if (status == SQLITE_OK) { michael@0: ::sqlite3_result_int(aCtx, distance); michael@0: } michael@0: else if (status == SQLITE_NOMEM) { michael@0: ::sqlite3_result_error_nomem(aCtx); michael@0: } michael@0: else { michael@0: ::sqlite3_result_error(aCtx, "User function returned error code", -1); michael@0: } michael@0: } michael@0: michael@0: } // namespace storage michael@0: } // namespace mozilla