mirror of https://github.com/roytam1/UXP
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
406 lines
12 KiB
406 lines
12 KiB
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
|
* vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ : |
|
* This Source Code Form is subject to the terms of the Mozilla Public |
|
* License, v. 2.0. If a copy of the MPL was not distributed with this |
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
|
|
#include "mozilla/ArrayUtils.h" |
|
|
|
#include "mozStorageSQLFunctions.h" |
|
#include "nsUnicharUtils.h" |
|
#include <algorithm> |
|
|
|
namespace mozilla { |
|
namespace storage { |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//// Local Helper Functions |
|
|
|
namespace { |
|
|
|
/** |
|
* Performs the LIKE comparison of a string against a pattern. For more detail |
|
* see http://www.sqlite.org/lang_expr.html#like. |
|
* |
|
* @param aPatternItr |
|
* An iterator at the start of the pattern to check for. |
|
* @param aPatternEnd |
|
* An iterator at the end of the pattern to check for. |
|
* @param aStringItr |
|
* An iterator at the start of the string to check for the pattern. |
|
* @param aStringEnd |
|
* An iterator at the end of the string to check for the pattern. |
|
* @param aEscapeChar |
|
* The character to use for escaping symbols in the pattern. |
|
* @return 1 if the pattern is found, 0 otherwise. |
|
*/ |
|
int |
|
likeCompare(nsAString::const_iterator aPatternItr, |
|
nsAString::const_iterator aPatternEnd, |
|
nsAString::const_iterator aStringItr, |
|
nsAString::const_iterator aStringEnd, |
|
char16_t aEscapeChar) |
|
{ |
|
const char16_t MATCH_ALL('%'); |
|
const char16_t MATCH_ONE('_'); |
|
|
|
bool lastWasEscape = false; |
|
while (aPatternItr != aPatternEnd) { |
|
/** |
|
* What we do in here is take a look at each character from the input |
|
* pattern, and do something with it. There are 4 possibilities: |
|
* 1) character is an un-escaped match-all character |
|
* 2) character is an un-escaped match-one character |
|
* 3) character is an un-escaped escape character |
|
* 4) character is not any of the above |
|
*/ |
|
if (!lastWasEscape && *aPatternItr == MATCH_ALL) { |
|
// CASE 1 |
|
/** |
|
* Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a |
|
* MATCH_ALL character. For each MATCH_ONE character, skip one character |
|
* in the pattern string. |
|
*/ |
|
while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) { |
|
if (*aPatternItr == MATCH_ONE) { |
|
// If we've hit the end of the string we are testing, no match |
|
if (aStringItr == aStringEnd) |
|
return 0; |
|
aStringItr++; |
|
} |
|
aPatternItr++; |
|
} |
|
|
|
// If we've hit the end of the pattern string, match |
|
if (aPatternItr == aPatternEnd) |
|
return 1; |
|
|
|
while (aStringItr != aStringEnd) { |
|
if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd, |
|
aEscapeChar)) { |
|
// we've hit a match, so indicate this |
|
return 1; |
|
} |
|
aStringItr++; |
|
} |
|
|
|
// No match |
|
return 0; |
|
} |
|
else if (!lastWasEscape && *aPatternItr == MATCH_ONE) { |
|
// CASE 2 |
|
if (aStringItr == aStringEnd) { |
|
// If we've hit the end of the string we are testing, no match |
|
return 0; |
|
} |
|
aStringItr++; |
|
lastWasEscape = false; |
|
} |
|
else if (!lastWasEscape && *aPatternItr == aEscapeChar) { |
|
// CASE 3 |
|
lastWasEscape = true; |
|
} |
|
else { |
|
// CASE 4 |
|
if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) { |
|
// If we've hit a point where the strings don't match, there is no match |
|
return 0; |
|
} |
|
aStringItr++; |
|
lastWasEscape = false; |
|
} |
|
|
|
aPatternItr++; |
|
} |
|
|
|
return aStringItr == aStringEnd; |
|
} |
|
|
|
/** |
|
* Compute the Levenshtein Edit Distance between two strings. |
|
* |
|
* @param aStringS |
|
* a string |
|
* @param aStringT |
|
* another string |
|
* @param _result |
|
* an outparam that will receive the edit distance between the arguments |
|
* @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc. |
|
*/ |
|
int |
|
levenshteinDistance(const nsAString &aStringS, |
|
const nsAString &aStringT, |
|
int *_result) |
|
{ |
|
// Set the result to a non-sensical value in case we encounter an error. |
|
*_result = -1; |
|
|
|
const uint32_t sLen = aStringS.Length(); |
|
const uint32_t tLen = aStringT.Length(); |
|
|
|
if (sLen == 0) { |
|
*_result = tLen; |
|
return SQLITE_OK; |
|
} |
|
if (tLen == 0) { |
|
*_result = sLen; |
|
return SQLITE_OK; |
|
} |
|
|
|
// Notionally, Levenshtein Distance is computed in a matrix. If we |
|
// assume s = "span" and t = "spam", the matrix would look like this: |
|
// s --> |
|
// t s p a n |
|
// | 0 1 2 3 4 |
|
// V s 1 * * * * |
|
// p 2 * * * * |
|
// a 3 * * * * |
|
// m 4 * * * * |
|
// |
|
// Note that the row width is sLen + 1 and the column height is tLen + 1, |
|
// where sLen is the length of the string "s" and tLen is the length of "t". |
|
// The first row and the first column are initialized as shown, and |
|
// the algorithm computes the remaining cells row-by-row, and |
|
// left-to-right within each row. The computation only requires that |
|
// we be able to see the current row and the previous one. |
|
|
|
// Allocate memory for two rows. |
|
AutoTArray<int, nsAutoString::kDefaultStorageSize> row1; |
|
AutoTArray<int, nsAutoString::kDefaultStorageSize> row2; |
|
|
|
// Declare the raw pointers that will actually be used to access the memory. |
|
int *prevRow = row1.AppendElements(sLen + 1); |
|
int *currRow = row2.AppendElements(sLen + 1); |
|
|
|
// Initialize the first row. |
|
for (uint32_t i = 0; i <= sLen; i++) |
|
prevRow[i] = i; |
|
|
|
const char16_t *s = aStringS.BeginReading(); |
|
const char16_t *t = aStringT.BeginReading(); |
|
|
|
// Compute the empty cells in the "matrix" row-by-row, starting with |
|
// the second row. |
|
for (uint32_t ti = 1; ti <= tLen; ti++) { |
|
|
|
// Initialize the first cell in this row. |
|
currRow[0] = ti; |
|
|
|
// Get the character from "t" that corresponds to this row. |
|
const char16_t tch = t[ti - 1]; |
|
|
|
// Compute the remaining cells in this row, left-to-right, |
|
// starting at the second column (and first character of "s"). |
|
for (uint32_t si = 1; si <= sLen; si++) { |
|
|
|
// Get the character from "s" that corresponds to this column, |
|
// compare it to the t-character, and compute the "cost". |
|
const char16_t sch = s[si - 1]; |
|
int cost = (sch == tch) ? 0 : 1; |
|
|
|
// ............ We want to calculate the value of cell "d" from |
|
// ...ab....... the previously calculated (or initialized) cells |
|
// ...cd....... "a", "b", and "c", where d = min(a', b', c'). |
|
// ............ |
|
int aPrime = prevRow[si - 1] + cost; |
|
int bPrime = prevRow[si] + 1; |
|
int cPrime = currRow[si - 1] + 1; |
|
currRow[si] = std::min(aPrime, std::min(bPrime, cPrime)); |
|
} |
|
|
|
// Advance to the next row. The current row becomes the previous |
|
// row and we recycle the old previous row as the new current row. |
|
// We don't need to re-initialize the new current row since we will |
|
// rewrite all of its cells anyway. |
|
int *oldPrevRow = prevRow; |
|
prevRow = currRow; |
|
currRow = oldPrevRow; |
|
} |
|
|
|
// The final result is the value of the last cell in the last row. |
|
// Note that that's now in the "previous" row, since we just swapped them. |
|
*_result = prevRow[sLen]; |
|
return SQLITE_OK; |
|
} |
|
|
|
// This struct is used only by registerFunctions below, but ISO C++98 forbids |
|
// instantiating a template dependent on a locally-defined type. Boo-urns! |
|
struct Functions { |
|
const char *zName; |
|
int nArg; |
|
int enc; |
|
void *pContext; |
|
void (*xFunc)(::sqlite3_context*, int, sqlite3_value**); |
|
}; |
|
|
|
} // namespace |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//// Exposed Functions |
|
|
|
int |
|
registerFunctions(sqlite3 *aDB) |
|
{ |
|
Functions functions[] = { |
|
{"lower", |
|
1, |
|
SQLITE_UTF16, |
|
0, |
|
caseFunction}, |
|
{"lower", |
|
1, |
|
SQLITE_UTF8, |
|
0, |
|
caseFunction}, |
|
{"upper", |
|
1, |
|
SQLITE_UTF16, |
|
(void*)1, |
|
caseFunction}, |
|
{"upper", |
|
1, |
|
SQLITE_UTF8, |
|
(void*)1, |
|
caseFunction}, |
|
|
|
{"like", |
|
2, |
|
SQLITE_UTF16, |
|
0, |
|
likeFunction}, |
|
{"like", |
|
2, |
|
SQLITE_UTF8, |
|
0, |
|
likeFunction}, |
|
{"like", |
|
3, |
|
SQLITE_UTF16, |
|
0, |
|
likeFunction}, |
|
{"like", |
|
3, |
|
SQLITE_UTF8, |
|
0, |
|
likeFunction}, |
|
|
|
{"levenshteinDistance", |
|
2, |
|
SQLITE_UTF16, |
|
0, |
|
levenshteinDistanceFunction}, |
|
{"levenshteinDistance", |
|
2, |
|
SQLITE_UTF8, |
|
0, |
|
levenshteinDistanceFunction}, |
|
}; |
|
|
|
int rv = SQLITE_OK; |
|
for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) { |
|
struct Functions *p = &functions[i]; |
|
rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext, |
|
p->xFunc, nullptr, nullptr); |
|
} |
|
|
|
return rv; |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//// SQL Functions |
|
|
|
void |
|
caseFunction(sqlite3_context *aCtx, |
|
int aArgc, |
|
sqlite3_value **aArgv) |
|
{ |
|
NS_ASSERTION(1 == aArgc, "Invalid number of arguments!"); |
|
|
|
nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); |
|
bool toUpper = ::sqlite3_user_data(aCtx) ? true : false; |
|
|
|
if (toUpper) |
|
::ToUpperCase(data); |
|
else |
|
::ToLowerCase(data); |
|
|
|
// Set the result. |
|
::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT); |
|
} |
|
|
|
/** |
|
* This implements the like() SQL function. This is used by the LIKE operator. |
|
* The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is |
|
* an escape character, say E, it is implemented as 'like(B, A, E)'. |
|
*/ |
|
void |
|
likeFunction(sqlite3_context *aCtx, |
|
int aArgc, |
|
sqlite3_value **aArgv) |
|
{ |
|
NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!"); |
|
|
|
if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) { |
|
::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex", |
|
SQLITE_TOOBIG); |
|
return; |
|
} |
|
|
|
if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1])) |
|
return; |
|
|
|
nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]))); |
|
nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]))); |
|
NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!"); |
|
|
|
char16_t E = 0; |
|
if (3 == aArgc) |
|
E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0]; |
|
|
|
nsAString::const_iterator itrString, endString; |
|
A.BeginReading(itrString); |
|
A.EndReading(endString); |
|
nsAString::const_iterator itrPattern, endPattern; |
|
B.BeginReading(itrPattern); |
|
B.EndReading(endPattern); |
|
::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString, |
|
endString, E)); |
|
} |
|
|
|
void levenshteinDistanceFunction(sqlite3_context *aCtx, |
|
int aArgc, |
|
sqlite3_value **aArgv) |
|
{ |
|
NS_ASSERTION(2 == aArgc, "Invalid number of arguments!"); |
|
|
|
// If either argument is a SQL NULL, then return SQL NULL. |
|
if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL || |
|
::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) { |
|
::sqlite3_result_null(aCtx); |
|
return; |
|
} |
|
|
|
int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t); |
|
const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])); |
|
|
|
int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t); |
|
const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])); |
|
|
|
// Compute the Levenshtein Distance, and return the result (or error). |
|
int distance = -1; |
|
const nsDependentString A(a, aLen); |
|
const nsDependentString B(b, bLen); |
|
int status = levenshteinDistance(A, B, &distance); |
|
if (status == SQLITE_OK) { |
|
::sqlite3_result_int(aCtx, distance); |
|
} |
|
else if (status == SQLITE_NOMEM) { |
|
::sqlite3_result_error_nomem(aCtx); |
|
} |
|
else { |
|
::sqlite3_result_error(aCtx, "User function returned error code", -1); |
|
} |
|
} |
|
|
|
} // namespace storage |
|
} // namespace mozilla
|
|
|