storage/src/mozStorageSQLFunctions.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/storage/src/mozStorageSQLFunctions.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,452 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
     1.5 + * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include "mozilla/ArrayUtils.h"
    1.11 +
    1.12 +#include "mozStorageSQLFunctions.h"
    1.13 +#include "nsUnicharUtils.h"
    1.14 +#include <algorithm>
    1.15 +
    1.16 +namespace mozilla {
    1.17 +namespace storage {
    1.18 +
    1.19 +////////////////////////////////////////////////////////////////////////////////
    1.20 +//// Local Helper Functions
    1.21 +
    1.22 +namespace {
    1.23 +
    1.24 +/**
    1.25 + * Performs the LIKE comparison of a string against a pattern.  For more detail
    1.26 + * see http://www.sqlite.org/lang_expr.html#like.
    1.27 + *
    1.28 + * @param aPatternItr
    1.29 + *        An iterator at the start of the pattern to check for.
    1.30 + * @param aPatternEnd
    1.31 + *        An iterator at the end of the pattern to check for.
    1.32 + * @param aStringItr
    1.33 + *        An iterator at the start of the string to check for the pattern.
    1.34 + * @param aStringEnd
    1.35 + *        An iterator at the end of the string to check for the pattern.
    1.36 + * @param aEscapeChar
    1.37 + *        The character to use for escaping symbols in the pattern.
    1.38 + * @return 1 if the pattern is found, 0 otherwise.
    1.39 + */
    1.40 +int
    1.41 +likeCompare(nsAString::const_iterator aPatternItr,
    1.42 +            nsAString::const_iterator aPatternEnd,
    1.43 +            nsAString::const_iterator aStringItr,
    1.44 +            nsAString::const_iterator aStringEnd,
    1.45 +            char16_t aEscapeChar)
    1.46 +{
    1.47 +  const char16_t MATCH_ALL('%');
    1.48 +  const char16_t MATCH_ONE('_');
    1.49 +
    1.50 +  bool lastWasEscape = false;
    1.51 +  while (aPatternItr != aPatternEnd) {
    1.52 +    /**
    1.53 +     * What we do in here is take a look at each character from the input
    1.54 +     * pattern, and do something with it.  There are 4 possibilities:
    1.55 +     * 1) character is an un-escaped match-all character
    1.56 +     * 2) character is an un-escaped match-one character
    1.57 +     * 3) character is an un-escaped escape character
    1.58 +     * 4) character is not any of the above
    1.59 +     */
    1.60 +    if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
    1.61 +      // CASE 1
    1.62 +      /**
    1.63 +       * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
    1.64 +       * MATCH_ALL character.  For each MATCH_ONE character, skip one character
    1.65 +       * in the pattern string.
    1.66 +       */
    1.67 +      while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
    1.68 +        if (*aPatternItr == MATCH_ONE) {
    1.69 +          // If we've hit the end of the string we are testing, no match
    1.70 +          if (aStringItr == aStringEnd)
    1.71 +            return 0;
    1.72 +          aStringItr++;
    1.73 +        }
    1.74 +        aPatternItr++;
    1.75 +      }
    1.76 +
    1.77 +      // If we've hit the end of the pattern string, match
    1.78 +      if (aPatternItr == aPatternEnd)
    1.79 +        return 1;
    1.80 +
    1.81 +      while (aStringItr != aStringEnd) {
    1.82 +        if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
    1.83 +                        aEscapeChar)) {
    1.84 +          // we've hit a match, so indicate this
    1.85 +          return 1;
    1.86 +        }
    1.87 +        aStringItr++;
    1.88 +      }
    1.89 +
    1.90 +      // No match
    1.91 +      return 0;
    1.92 +    }
    1.93 +    else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
    1.94 +      // CASE 2
    1.95 +      if (aStringItr == aStringEnd) {
    1.96 +        // If we've hit the end of the string we are testing, no match
    1.97 +        return 0;
    1.98 +      }
    1.99 +      aStringItr++;
   1.100 +      lastWasEscape = false;
   1.101 +    }
   1.102 +    else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
   1.103 +      // CASE 3
   1.104 +      lastWasEscape = true;
   1.105 +    }
   1.106 +    else {
   1.107 +      // CASE 4
   1.108 +      if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
   1.109 +        // If we've hit a point where the strings don't match, there is no match
   1.110 +        return 0;
   1.111 +      }
   1.112 +      aStringItr++;
   1.113 +      lastWasEscape = false;
   1.114 +    }
   1.115 +
   1.116 +    aPatternItr++;
   1.117 +  }
   1.118 +
   1.119 +  return aStringItr == aStringEnd;
   1.120 +}
   1.121 +
   1.122 +/**
   1.123 + * This class manages a dynamic array.  It can represent an array of any 
   1.124 + * reasonable size, but if the array is "N" elements or smaller, it will be
   1.125 + * stored using fixed space inside the auto array itself.  If the auto array
   1.126 + * is a local variable, this internal storage will be allocated cheaply on the
   1.127 + * stack, similar to nsAutoString.  If a larger size is requested, the memory
   1.128 + * will be dynamically allocated from the heap.  Since the destructor will
   1.129 + * free any heap-allocated memory, client code doesn't need to care where the
   1.130 + * memory came from.
   1.131 + */
   1.132 +template <class T, size_t N> class AutoArray
   1.133 +{
   1.134 +
   1.135 +public:
   1.136 +
   1.137 +  AutoArray(size_t size)
   1.138 +  : mBuffer(size <= N ? mAutoBuffer : new T[size])
   1.139 +  {
   1.140 +  }
   1.141 +
   1.142 +  ~AutoArray()
   1.143 +  { 
   1.144 +    if (mBuffer != mAutoBuffer)
   1.145 +      delete[] mBuffer; 
   1.146 +  }
   1.147 +
   1.148 +  /**
   1.149 +   * Return the pointer to the allocated array.
   1.150 +   * @note If the array allocation failed, get() will return nullptr!
   1.151 +   *
   1.152 +   * @return the pointer to the allocated array
   1.153 +   */
   1.154 +  T *get() 
   1.155 +  {
   1.156 +    return mBuffer; 
   1.157 +  }
   1.158 +
   1.159 +private:
   1.160 +  T *mBuffer;           // Points to mAutoBuffer if we can use it, heap otherwise.
   1.161 +  T mAutoBuffer[N];     // The internal memory buffer that we use if we can.
   1.162 +};
   1.163 +
   1.164 +/**
   1.165 + * Compute the Levenshtein Edit Distance between two strings.
   1.166 + * 
   1.167 + * @param aStringS
   1.168 + *        a string
   1.169 + * @param aStringT
   1.170 + *        another string
   1.171 + * @param _result
   1.172 + *        an outparam that will receive the edit distance between the arguments
   1.173 + * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
   1.174 + */
   1.175 +int
   1.176 +levenshteinDistance(const nsAString &aStringS,
   1.177 +                    const nsAString &aStringT,
   1.178 +                    int *_result)
   1.179 +{
   1.180 +    // Set the result to a non-sensical value in case we encounter an error.
   1.181 +    *_result = -1;
   1.182 +
   1.183 +    const uint32_t sLen = aStringS.Length();
   1.184 +    const uint32_t tLen = aStringT.Length();
   1.185 +
   1.186 +    if (sLen == 0) {
   1.187 +      *_result = tLen;
   1.188 +      return SQLITE_OK;
   1.189 +    }
   1.190 +    if (tLen == 0) {
   1.191 +      *_result = sLen;
   1.192 +      return SQLITE_OK;
   1.193 +    }
   1.194 +
   1.195 +    // Notionally, Levenshtein Distance is computed in a matrix.  If we 
   1.196 +    // assume s = "span" and t = "spam", the matrix would look like this:
   1.197 +    //    s -->
   1.198 +    //  t          s   p   a   n
   1.199 +    //  |      0   1   2   3   4
   1.200 +    //  V  s   1   *   *   *   *
   1.201 +    //     p   2   *   *   *   *
   1.202 +    //     a   3   *   *   *   *
   1.203 +    //     m   4   *   *   *   *
   1.204 +    //
   1.205 +    // Note that the row width is sLen + 1 and the column height is tLen + 1,
   1.206 +    // where sLen is the length of the string "s" and tLen is the length of "t".
   1.207 +    // The first row and the first column are initialized as shown, and
   1.208 +    // the algorithm computes the remaining cells row-by-row, and
   1.209 +    // left-to-right within each row.  The computation only requires that
   1.210 +    // we be able to see the current row and the previous one.
   1.211 +
   1.212 +    // Allocate memory for two rows.  Use AutoArray's to manage the memory
   1.213 +    // so we don't have to explicitly free it, and so we can avoid the expense
   1.214 +    // of memory allocations for relatively small strings.
   1.215 +    AutoArray<int, nsAutoString::kDefaultStorageSize> row1(sLen + 1);
   1.216 +    AutoArray<int, nsAutoString::kDefaultStorageSize> row2(sLen + 1);
   1.217 +
   1.218 +    // Declare the raw pointers that will actually be used to access the memory.
   1.219 +    int *prevRow = row1.get();
   1.220 +    NS_ENSURE_TRUE(prevRow, SQLITE_NOMEM);
   1.221 +    int *currRow = row2.get();
   1.222 +    NS_ENSURE_TRUE(currRow, SQLITE_NOMEM);
   1.223 +
   1.224 +    // Initialize the first row.
   1.225 +    for (uint32_t i = 0; i <= sLen; i++)
   1.226 +        prevRow[i] = i;
   1.227 +
   1.228 +    const char16_t *s = aStringS.BeginReading();
   1.229 +    const char16_t *t = aStringT.BeginReading();
   1.230 +
   1.231 +    // Compute the empty cells in the "matrix" row-by-row, starting with
   1.232 +    // the second row.
   1.233 +    for (uint32_t ti = 1; ti <= tLen; ti++) {
   1.234 +
   1.235 +        // Initialize the first cell in this row.
   1.236 +        currRow[0] = ti;
   1.237 +
   1.238 +        // Get the character from "t" that corresponds to this row.
   1.239 +        const char16_t tch = t[ti - 1];
   1.240 +
   1.241 +        // Compute the remaining cells in this row, left-to-right,
   1.242 +        // starting at the second column (and first character of "s").
   1.243 +        for (uint32_t si = 1; si <= sLen; si++) {
   1.244 +            
   1.245 +            // Get the character from "s" that corresponds to this column,
   1.246 +            // compare it to the t-character, and compute the "cost".
   1.247 +            const char16_t sch = s[si - 1];
   1.248 +            int cost = (sch == tch) ? 0 : 1;
   1.249 +
   1.250 +            // ............ We want to calculate the value of cell "d" from
   1.251 +            // ...ab....... the previously calculated (or initialized) cells
   1.252 +            // ...cd....... "a", "b", and "c", where d = min(a', b', c').
   1.253 +            // ............ 
   1.254 +            int aPrime = prevRow[si - 1] + cost;
   1.255 +            int bPrime = prevRow[si] + 1;
   1.256 +            int cPrime = currRow[si - 1] + 1;
   1.257 +            currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
   1.258 +        }
   1.259 +
   1.260 +        // Advance to the next row.  The current row becomes the previous
   1.261 +        // row and we recycle the old previous row as the new current row.
   1.262 +        // We don't need to re-initialize the new current row since we will
   1.263 +        // rewrite all of its cells anyway.
   1.264 +        int *oldPrevRow = prevRow;
   1.265 +        prevRow = currRow;
   1.266 +        currRow = oldPrevRow;
   1.267 +    }
   1.268 +
   1.269 +    // The final result is the value of the last cell in the last row.
   1.270 +    // Note that that's now in the "previous" row, since we just swapped them.
   1.271 +    *_result = prevRow[sLen];
   1.272 +    return SQLITE_OK;
   1.273 +}
   1.274 +
   1.275 +// This struct is used only by registerFunctions below, but ISO C++98 forbids
   1.276 +// instantiating a template dependent on a locally-defined type.  Boo-urns!
   1.277 +struct Functions {
   1.278 +  const char *zName;
   1.279 +  int nArg;
   1.280 +  int enc;
   1.281 +  void *pContext;
   1.282 +  void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
   1.283 +};
   1.284 +
   1.285 +} // anonymous namespace
   1.286 +
   1.287 +////////////////////////////////////////////////////////////////////////////////
   1.288 +//// Exposed Functions
   1.289 +
   1.290 +int
   1.291 +registerFunctions(sqlite3 *aDB)
   1.292 +{
   1.293 +  Functions functions[] = {
   1.294 +    {"lower",               
   1.295 +      1, 
   1.296 +      SQLITE_UTF16, 
   1.297 +      0,        
   1.298 +      caseFunction},
   1.299 +    {"lower",               
   1.300 +      1, 
   1.301 +      SQLITE_UTF8,  
   1.302 +      0,        
   1.303 +      caseFunction},
   1.304 +    {"upper",               
   1.305 +      1, 
   1.306 +      SQLITE_UTF16, 
   1.307 +      (void*)1, 
   1.308 +      caseFunction},
   1.309 +    {"upper",               
   1.310 +      1, 
   1.311 +      SQLITE_UTF8,  
   1.312 +      (void*)1, 
   1.313 +      caseFunction},
   1.314 +
   1.315 +    {"like",                
   1.316 +      2, 
   1.317 +      SQLITE_UTF16, 
   1.318 +      0,        
   1.319 +      likeFunction},
   1.320 +    {"like",                
   1.321 +      2, 
   1.322 +      SQLITE_UTF8,  
   1.323 +      0,        
   1.324 +      likeFunction},
   1.325 +    {"like",                
   1.326 +      3, 
   1.327 +      SQLITE_UTF16, 
   1.328 +      0,        
   1.329 +      likeFunction},
   1.330 +    {"like",                
   1.331 +      3, 
   1.332 +      SQLITE_UTF8,  
   1.333 +      0,        
   1.334 +      likeFunction},
   1.335 +
   1.336 +    {"levenshteinDistance", 
   1.337 +      2, 
   1.338 +      SQLITE_UTF16, 
   1.339 +      0,        
   1.340 +      levenshteinDistanceFunction},
   1.341 +    {"levenshteinDistance", 
   1.342 +      2, 
   1.343 +      SQLITE_UTF8,  
   1.344 +      0,        
   1.345 +      levenshteinDistanceFunction},
   1.346 +  };
   1.347 +
   1.348 +  int rv = SQLITE_OK;
   1.349 +  for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
   1.350 +    struct Functions *p = &functions[i];
   1.351 +    rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
   1.352 +                                   p->xFunc, nullptr, nullptr);
   1.353 +  }
   1.354 +
   1.355 +  return rv;
   1.356 +}
   1.357 +
   1.358 +////////////////////////////////////////////////////////////////////////////////
   1.359 +//// SQL Functions
   1.360 +
   1.361 +void
   1.362 +caseFunction(sqlite3_context *aCtx,
   1.363 +             int aArgc,
   1.364 +             sqlite3_value **aArgv)
   1.365 +{
   1.366 +  NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
   1.367 +
   1.368 +  nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
   1.369 +  bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
   1.370 +
   1.371 +  if (toUpper)
   1.372 +    ::ToUpperCase(data);
   1.373 +  else
   1.374 +    ::ToLowerCase(data);
   1.375 +
   1.376 +  // Set the result.
   1.377 +  ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
   1.378 +}
   1.379 +
   1.380 +/**
   1.381 + * This implements the like() SQL function.  This is used by the LIKE operator.
   1.382 + * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
   1.383 + * an escape character, say E, it is implemented as 'like(B, A, E)'.
   1.384 + */
   1.385 +void
   1.386 +likeFunction(sqlite3_context *aCtx,
   1.387 +             int aArgc,
   1.388 +             sqlite3_value **aArgv)
   1.389 +{
   1.390 +  NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
   1.391 +
   1.392 +  if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
   1.393 +    ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
   1.394 +                           SQLITE_TOOBIG);
   1.395 +    return;
   1.396 +  }
   1.397 +
   1.398 +  if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
   1.399 +    return;
   1.400 +
   1.401 +  nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])));
   1.402 +  nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
   1.403 +  NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
   1.404 +
   1.405 +  char16_t E = 0;
   1.406 +  if (3 == aArgc)
   1.407 +    E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0];
   1.408 +
   1.409 +  nsAString::const_iterator itrString, endString;
   1.410 +  A.BeginReading(itrString);
   1.411 +  A.EndReading(endString);
   1.412 +  nsAString::const_iterator itrPattern, endPattern;
   1.413 +  B.BeginReading(itrPattern);
   1.414 +  B.EndReading(endPattern);
   1.415 +  ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString,
   1.416 +                                         endString, E));
   1.417 +}
   1.418 +
   1.419 +void levenshteinDistanceFunction(sqlite3_context *aCtx,
   1.420 +                                 int aArgc,
   1.421 +                                 sqlite3_value **aArgv)
   1.422 +{
   1.423 +  NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
   1.424 +
   1.425 +  // If either argument is a SQL NULL, then return SQL NULL.
   1.426 +  if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
   1.427 +      ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
   1.428 +    ::sqlite3_result_null(aCtx);
   1.429 +    return;
   1.430 +  }
   1.431 +
   1.432 +  int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
   1.433 +  const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]));
   1.434 +
   1.435 +  int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
   1.436 +  const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]));
   1.437 +
   1.438 +  // Compute the Levenshtein Distance, and return the result (or error).
   1.439 +  int distance = -1;
   1.440 +  const nsDependentString A(a, aLen);
   1.441 +  const nsDependentString B(b, bLen);
   1.442 +  int status = levenshteinDistance(A, B, &distance);
   1.443 +  if (status == SQLITE_OK) {
   1.444 +    ::sqlite3_result_int(aCtx, distance);    
   1.445 +  }
   1.446 +  else if (status == SQLITE_NOMEM) {
   1.447 +    ::sqlite3_result_error_nomem(aCtx);
   1.448 +  }
   1.449 +  else {
   1.450 +    ::sqlite3_result_error(aCtx, "User function returned error code", -1);
   1.451 +  }
   1.452 +}
   1.453 +
   1.454 +} // namespace storage
   1.455 +} // namespace mozilla

mercurial