storage/src/mozStorageSQLFunctions.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
     2  * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
     3  * This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include "mozilla/ArrayUtils.h"
     9 #include "mozStorageSQLFunctions.h"
    10 #include "nsUnicharUtils.h"
    11 #include <algorithm>
    13 namespace mozilla {
    14 namespace storage {
    16 ////////////////////////////////////////////////////////////////////////////////
    17 //// Local Helper Functions
    19 namespace {
    21 /**
    22  * Performs the LIKE comparison of a string against a pattern.  For more detail
    23  * see http://www.sqlite.org/lang_expr.html#like.
    24  *
    25  * @param aPatternItr
    26  *        An iterator at the start of the pattern to check for.
    27  * @param aPatternEnd
    28  *        An iterator at the end of the pattern to check for.
    29  * @param aStringItr
    30  *        An iterator at the start of the string to check for the pattern.
    31  * @param aStringEnd
    32  *        An iterator at the end of the string to check for the pattern.
    33  * @param aEscapeChar
    34  *        The character to use for escaping symbols in the pattern.
    35  * @return 1 if the pattern is found, 0 otherwise.
    36  */
    37 int
    38 likeCompare(nsAString::const_iterator aPatternItr,
    39             nsAString::const_iterator aPatternEnd,
    40             nsAString::const_iterator aStringItr,
    41             nsAString::const_iterator aStringEnd,
    42             char16_t aEscapeChar)
    43 {
    44   const char16_t MATCH_ALL('%');
    45   const char16_t MATCH_ONE('_');
    47   bool lastWasEscape = false;
    48   while (aPatternItr != aPatternEnd) {
    49     /**
    50      * What we do in here is take a look at each character from the input
    51      * pattern, and do something with it.  There are 4 possibilities:
    52      * 1) character is an un-escaped match-all character
    53      * 2) character is an un-escaped match-one character
    54      * 3) character is an un-escaped escape character
    55      * 4) character is not any of the above
    56      */
    57     if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
    58       // CASE 1
    59       /**
    60        * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
    61        * MATCH_ALL character.  For each MATCH_ONE character, skip one character
    62        * in the pattern string.
    63        */
    64       while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
    65         if (*aPatternItr == MATCH_ONE) {
    66           // If we've hit the end of the string we are testing, no match
    67           if (aStringItr == aStringEnd)
    68             return 0;
    69           aStringItr++;
    70         }
    71         aPatternItr++;
    72       }
    74       // If we've hit the end of the pattern string, match
    75       if (aPatternItr == aPatternEnd)
    76         return 1;
    78       while (aStringItr != aStringEnd) {
    79         if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
    80                         aEscapeChar)) {
    81           // we've hit a match, so indicate this
    82           return 1;
    83         }
    84         aStringItr++;
    85       }
    87       // No match
    88       return 0;
    89     }
    90     else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
    91       // CASE 2
    92       if (aStringItr == aStringEnd) {
    93         // If we've hit the end of the string we are testing, no match
    94         return 0;
    95       }
    96       aStringItr++;
    97       lastWasEscape = false;
    98     }
    99     else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
   100       // CASE 3
   101       lastWasEscape = true;
   102     }
   103     else {
   104       // CASE 4
   105       if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
   106         // If we've hit a point where the strings don't match, there is no match
   107         return 0;
   108       }
   109       aStringItr++;
   110       lastWasEscape = false;
   111     }
   113     aPatternItr++;
   114   }
   116   return aStringItr == aStringEnd;
   117 }
   119 /**
   120  * This class manages a dynamic array.  It can represent an array of any 
   121  * reasonable size, but if the array is "N" elements or smaller, it will be
   122  * stored using fixed space inside the auto array itself.  If the auto array
   123  * is a local variable, this internal storage will be allocated cheaply on the
   124  * stack, similar to nsAutoString.  If a larger size is requested, the memory
   125  * will be dynamically allocated from the heap.  Since the destructor will
   126  * free any heap-allocated memory, client code doesn't need to care where the
   127  * memory came from.
   128  */
   129 template <class T, size_t N> class AutoArray
   130 {
   132 public:
   134   AutoArray(size_t size)
   135   : mBuffer(size <= N ? mAutoBuffer : new T[size])
   136   {
   137   }
   139   ~AutoArray()
   140   { 
   141     if (mBuffer != mAutoBuffer)
   142       delete[] mBuffer; 
   143   }
   145   /**
   146    * Return the pointer to the allocated array.
   147    * @note If the array allocation failed, get() will return nullptr!
   148    *
   149    * @return the pointer to the allocated array
   150    */
   151   T *get() 
   152   {
   153     return mBuffer; 
   154   }
   156 private:
   157   T *mBuffer;           // Points to mAutoBuffer if we can use it, heap otherwise.
   158   T mAutoBuffer[N];     // The internal memory buffer that we use if we can.
   159 };
   161 /**
   162  * Compute the Levenshtein Edit Distance between two strings.
   163  * 
   164  * @param aStringS
   165  *        a string
   166  * @param aStringT
   167  *        another string
   168  * @param _result
   169  *        an outparam that will receive the edit distance between the arguments
   170  * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
   171  */
   172 int
   173 levenshteinDistance(const nsAString &aStringS,
   174                     const nsAString &aStringT,
   175                     int *_result)
   176 {
   177     // Set the result to a non-sensical value in case we encounter an error.
   178     *_result = -1;
   180     const uint32_t sLen = aStringS.Length();
   181     const uint32_t tLen = aStringT.Length();
   183     if (sLen == 0) {
   184       *_result = tLen;
   185       return SQLITE_OK;
   186     }
   187     if (tLen == 0) {
   188       *_result = sLen;
   189       return SQLITE_OK;
   190     }
   192     // Notionally, Levenshtein Distance is computed in a matrix.  If we 
   193     // assume s = "span" and t = "spam", the matrix would look like this:
   194     //    s -->
   195     //  t          s   p   a   n
   196     //  |      0   1   2   3   4
   197     //  V  s   1   *   *   *   *
   198     //     p   2   *   *   *   *
   199     //     a   3   *   *   *   *
   200     //     m   4   *   *   *   *
   201     //
   202     // Note that the row width is sLen + 1 and the column height is tLen + 1,
   203     // where sLen is the length of the string "s" and tLen is the length of "t".
   204     // The first row and the first column are initialized as shown, and
   205     // the algorithm computes the remaining cells row-by-row, and
   206     // left-to-right within each row.  The computation only requires that
   207     // we be able to see the current row and the previous one.
   209     // Allocate memory for two rows.  Use AutoArray's to manage the memory
   210     // so we don't have to explicitly free it, and so we can avoid the expense
   211     // of memory allocations for relatively small strings.
   212     AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1);
   213     AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1);
   215     // Declare the raw pointers that will actually be used to access the memory.
   216     int *prevRow = row1.get();
   217     NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM);
   218     int *currRow = row2.get();
   219     NS_ENSURE_TRUE(currRow, SQLITE_NOMEM);
   221     // Initialize the first row.
   222     for (uint32_t i = 0; i <= sLen; i++)
   223         prevRow[i] = i;
   225     const char16_t *s = aStringS.BeginReading();
   226     const char16_t *t = aStringT.BeginReading();
   228     // Compute the empty cells in the "matrix" row-by-row, starting with
   229     // the second row.
   230     for (uint32_t ti = 1; ti <= tLen; ti++) {
   232         // Initialize the first cell in this row.
   233         currRow[0] = ti;
   235         // Get the character from "t" that corresponds to this row.
   236         const char16_t tch = t[ti - 1];
   238         // Compute the remaining cells in this row, left-to-right,
   239         // starting at the second column (and first character of "s").
   240         for (uint32_t si = 1; si <= sLen; si++) {
   242             // Get the character from "s" that corresponds to this column,
   243             // compare it to the t-character, and compute the "cost".
   244             const char16_t sch = s[si - 1];
   245             int cost = (sch == tch) ? 0 : 1;
   247             // ............ We want to calculate the value of cell "d" from
   248             // ...ab....... the previously calculated (or initialized) cells
   249             // ...cd....... "a", "b", and "c", where d = min(a', b', c').
   250             // ............ 
   251             int aPrime = prevRow[si - 1] + cost;
   252             int bPrime = prevRow[si] + 1;
   253             int cPrime = currRow[si - 1] + 1;
   254             currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
   255         }
   257         // Advance to the next row.  The current row becomes the previous
   258         // row and we recycle the old previous row as the new current row.
   259         // We don't need to re-initialize the new current row since we will
   260         // rewrite all of its cells anyway.
   261         int *oldPrevRow = prevRow;
   262         prevRow = currRow;
   263         currRow = oldPrevRow;
   264     }
   266     // The final result is the value of the last cell in the last row.
   267     // Note that that's now in the "previous" row, since we just swapped them.
   268     *_result = prevRow[sLen];
   269     return SQLITE_OK;
   270 }
   272 // This struct is used only by registerFunctions below, but ISO C++98 forbids
   273 // instantiating a template dependent on a locally-defined type.  Boo-urns!
   274 struct Functions {
   275   const char *zName;
   276   int nArg;
   277   int enc;
   278   void *pContext;
   279   void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
   280 };
   282 } // anonymous namespace
   284 ////////////////////////////////////////////////////////////////////////////////
   285 //// Exposed Functions
   287 int
   288 registerFunctions(sqlite3 *aDB)
   289 {
   290   Functions functions[] = {
   291     {"lower",               
   292       1, 
   293       SQLITE_UTF16, 
   294       0,        
   295       caseFunction},
   296     {"lower",               
   297       1, 
   298       SQLITE_UTF8,  
   299       0,        
   300       caseFunction},
   301     {"upper",               
   302       1, 
   303       SQLITE_UTF16, 
   304       (void*)1, 
   305       caseFunction},
   306     {"upper",               
   307       1, 
   308       SQLITE_UTF8,  
   309       (void*)1, 
   310       caseFunction},
   312     {"like",                
   313       2, 
   314       SQLITE_UTF16, 
   315       0,        
   316       likeFunction},
   317     {"like",                
   318       2, 
   319       SQLITE_UTF8,  
   320       0,        
   321       likeFunction},
   322     {"like",                
   323       3, 
   324       SQLITE_UTF16, 
   325       0,        
   326       likeFunction},
   327     {"like",                
   328       3, 
   329       SQLITE_UTF8,  
   330       0,        
   331       likeFunction},
   333     {"levenshteinDistance", 
   334       2, 
   335       SQLITE_UTF16, 
   336       0,        
   337       levenshteinDistanceFunction},
   338     {"levenshteinDistance", 
   339       2, 
   340       SQLITE_UTF8,  
   341       0,        
   342       levenshteinDistanceFunction},
   343   };
   345   int rv = SQLITE_OK;
   346   for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
   347     struct Functions *p = &functions[i];
   348     rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
   349                                    p->xFunc, nullptr, nullptr);
   350   }
   352   return rv;
   353 }
   355 ////////////////////////////////////////////////////////////////////////////////
   356 //// SQL Functions
   358 void
   359 caseFunction(sqlite3_context *aCtx,
   360              int aArgc,
   361              sqlite3_value **aArgv)
   362 {
   363   NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
   365   nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
   366   bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
   368   if (toUpper)
   369     ::ToUpperCase(data);
   370   else
   371     ::ToLowerCase(data);
   373   // Set the result.
   374   ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
   375 }
   377 /**
   378  * This implements the like() SQL function.  This is used by the LIKE operator.
   379  * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
   380  * an escape character, say E, it is implemented as 'like(B, A, E)'.
   381  */
   382 void
   383 likeFunction(sqlite3_context *aCtx,
   384              int aArgc,
   385              sqlite3_value **aArgv)
   386 {
   387   NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
   389   if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
   390     ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
   391                            SQLITE_TOOBIG);
   392     return;
   393   }
   395   if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
   396     return;
   398   nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])));
   399   nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
   400   NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
   402   char16_t E = 0;
   403   if (3 == aArgc)
   404     E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0];
   406   nsAString::const_iterator itrString, endString;
   407   A.BeginReading(itrString);
   408   A.EndReading(endString);
   409   nsAString::const_iterator itrPattern, endPattern;
   410   B.BeginReading(itrPattern);
   411   B.EndReading(endPattern);
   412   ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString,
   413                                          endString, E));
   414 }
   416 void levenshteinDistanceFunction(sqlite3_context *aCtx,
   417                                  int aArgc,
   418                                  sqlite3_value **aArgv)
   419 {
   420   NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
   422   // If either argument is a SQL NULL, then return SQL NULL.
   423   if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
   424       ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
   425     ::sqlite3_result_null(aCtx);
   426     return;
   427   }
   429   int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
   430   const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]));
   432   int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
   433   const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]));
   435   // Compute the Levenshtein Distance, and return the result (or error).
   436   int distance = -1;
   437   const nsDependentString A(a, aLen);
   438   const nsDependentString B(b, bLen);
   439   int status = levenshteinDistance(A, B, &distance);
   440   if (status == SQLITE_OK) {
   441     ::sqlite3_result_int(aCtx, distance);    
   442   }
   443   else if (status == SQLITE_NOMEM) {
   444     ::sqlite3_result_error_nomem(aCtx);
   445   }
   446   else {
   447     ::sqlite3_result_error(aCtx, "User function returned error code", -1);
   448   }
   449 }
   451 } // namespace storage
   452 } // namespace mozilla

mercurial