michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozilla/FloatingPoint.h" michael@0: michael@0: #include "Key.h" michael@0: #include "ReportInternalError.h" michael@0: michael@0: #include "jsfriendapi.h" michael@0: #include "nsAlgorithm.h" michael@0: #include "nsJSUtils.h" michael@0: #include "xpcpublic.h" michael@0: #include "mozilla/Endian.h" michael@0: #include michael@0: michael@0: USING_INDEXEDDB_NAMESPACE michael@0: michael@0: /* michael@0: Here's how we encode keys: michael@0: michael@0: Basic strategy is the following michael@0: michael@0: Numbers: 1 n n n n n n n n ("n"s are encoded 64bit float) michael@0: Dates: 2 n n n n n n n n ("n"s are encoded 64bit float) michael@0: Strings: 3 s s s ... 0 ("s"s are encoded unicode bytes) michael@0: Arrays: 4 i i i ... 0 ("i"s are encoded array items) michael@0: michael@0: michael@0: When encoding floats, 64bit IEEE 754 are almost sortable, except that michael@0: positive sort lower than negative, and negative sort descending. So we use michael@0: the following encoding: michael@0: michael@0: value < 0 ? michael@0: (-to64bitInt(value)) : michael@0: (to64bitInt(value) | 0x8000000000000000) michael@0: michael@0: michael@0: When encoding strings, we use variable-size encoding per the following table michael@0: michael@0: Chars 0 - 7E are encoded as 0xxxxxxx with 1 added michael@0: Chars 7F - (3FFF+7F) are encoded as 10xxxxxx xxxxxxxx with 7F subtracted michael@0: Chars (3FFF+80) - FFFF are encoded as 11xxxxxx xxxxxxxx xx000000 michael@0: michael@0: This ensures that the first byte is never encoded as 0, which means that the michael@0: string terminator (per basic-stategy table) sorts before any character. michael@0: The reason that (3FFF+80) - FFFF is encoded "shifted up" 6 bits is to maximize michael@0: the chance that the last character is 0. See below for why. michael@0: michael@0: michael@0: When encoding Arrays, we use an additional trick. Rather than adding a byte michael@0: containing the value '4' to indicate type, we instead add 4 to the next byte. michael@0: This is usually the byte containing the type of the first item in the array. michael@0: So simple examples are michael@0: michael@0: ["foo"] 7 s s s 0 0 // 7 is 3 + 4 michael@0: [1, 2] 5 n n n n n n n n 1 n n n n n n n n 0 // 5 is 1 + 4 michael@0: michael@0: Whe do this iteratively if the first item in the array is also an array michael@0: michael@0: [["foo"]] 11 s s s 0 0 0 michael@0: michael@0: However, to avoid overflow in the byte, we only do this 3 times. If the first michael@0: item in an array is an array, and that array also has an array as first item, michael@0: we simply write out the total value accumulated so far and then follow the michael@0: "normal" rules. michael@0: michael@0: [[["foo"]]] 12 3 s s s 0 0 0 0 michael@0: michael@0: There is another edge case that can happen though, which is that the array michael@0: doesn't have a first item to which we can add 4 to the type. Instead the michael@0: next byte would normally be the array terminator (per basic-strategy table) michael@0: so we simply add the 4 there. michael@0: michael@0: [[]] 8 0 // 8 is 4 + 4 + 0 michael@0: [] 4 // 4 is 4 + 0 michael@0: [[], "foo"] 8 3 s s s 0 0 // 8 is 4 + 4 + 0 michael@0: michael@0: Note that the max-3-times rule kicks in before we get a chance to add to the michael@0: array terminator michael@0: michael@0: [[[]]] 12 0 0 0 // 12 is 4 + 4 + 4 michael@0: michael@0: We could use a much higher number than 3 at no complexity or performance cost, michael@0: however it seems unlikely that it'll make a practical difference, and the low michael@0: limit makes testing eaiser. michael@0: michael@0: michael@0: As a final optimization we do a post-encoding step which drops all 0s at the michael@0: end of the encoded buffer. michael@0: michael@0: "foo" // 3 s s s michael@0: 1 // 1 bf f0 michael@0: ["a", "b"] // 7 s 3 s michael@0: [1, 2] // 5 bf f0 0 0 0 0 0 0 1 c0 michael@0: [[]] // 8 michael@0: */ michael@0: michael@0: const int MaxArrayCollapse = 3; michael@0: michael@0: const int MaxRecursionDepth = 256; michael@0: michael@0: nsresult michael@0: Key::EncodeJSValInternal(JSContext* aCx, JS::Handle aVal, michael@0: uint8_t aTypeOffset, uint16_t aRecursionDepth) michael@0: { michael@0: NS_ENSURE_TRUE(aRecursionDepth < MaxRecursionDepth, NS_ERROR_DOM_INDEXEDDB_DATA_ERR); michael@0: michael@0: static_assert(eMaxType * MaxArrayCollapse < 256, michael@0: "Unable to encode jsvals."); michael@0: michael@0: if (aVal.isString()) { michael@0: nsDependentJSString str; michael@0: if (!str.init(aCx, aVal)) { michael@0: IDB_REPORT_INTERNAL_ERR(); michael@0: return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; michael@0: } michael@0: EncodeString(str, aTypeOffset); michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (aVal.isNumber()) { michael@0: double d = aVal.toNumber(); michael@0: if (mozilla::IsNaN(d)) { michael@0: return NS_ERROR_DOM_INDEXEDDB_DATA_ERR; michael@0: } michael@0: EncodeNumber(d, eFloat + aTypeOffset); michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (aVal.isObject()) { michael@0: JS::Rooted obj(aCx, &aVal.toObject()); michael@0: if (JS_IsArrayObject(aCx, obj)) { michael@0: aTypeOffset += eMaxType; michael@0: michael@0: if (aTypeOffset == eMaxType * MaxArrayCollapse) { michael@0: mBuffer.Append(aTypeOffset); michael@0: aTypeOffset = 0; michael@0: } michael@0: NS_ASSERTION((aTypeOffset % eMaxType) == 0 && michael@0: aTypeOffset < (eMaxType * MaxArrayCollapse), michael@0: "Wrong typeoffset"); michael@0: michael@0: uint32_t length; michael@0: if (!JS_GetArrayLength(aCx, obj, &length)) { michael@0: IDB_REPORT_INTERNAL_ERR(); michael@0: return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; michael@0: } michael@0: michael@0: for (uint32_t index = 0; index < length; index++) { michael@0: JS::Rooted val(aCx); michael@0: if (!JS_GetElement(aCx, obj, index, &val)) { michael@0: IDB_REPORT_INTERNAL_ERR(); michael@0: return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; michael@0: } michael@0: michael@0: nsresult rv = EncodeJSValInternal(aCx, val, aTypeOffset, michael@0: aRecursionDepth + 1); michael@0: if (NS_FAILED(rv)) { michael@0: return rv; michael@0: } michael@0: michael@0: aTypeOffset = 0; michael@0: } michael@0: michael@0: mBuffer.Append(eTerminator + aTypeOffset); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (JS_ObjectIsDate(aCx, obj)) { michael@0: if (!js_DateIsValid(obj)) { michael@0: return NS_ERROR_DOM_INDEXEDDB_DATA_ERR; michael@0: } michael@0: EncodeNumber(js_DateGetMsecSinceEpoch(obj), eDate + aTypeOffset); michael@0: return NS_OK; michael@0: } michael@0: } michael@0: michael@0: return NS_ERROR_DOM_INDEXEDDB_DATA_ERR; michael@0: } michael@0: michael@0: // static michael@0: nsresult michael@0: Key::DecodeJSValInternal(const unsigned char*& aPos, const unsigned char* aEnd, michael@0: JSContext* aCx, uint8_t aTypeOffset, JS::MutableHandle aVal, michael@0: uint16_t aRecursionDepth) michael@0: { michael@0: NS_ENSURE_TRUE(aRecursionDepth < MaxRecursionDepth, NS_ERROR_DOM_INDEXEDDB_DATA_ERR); michael@0: michael@0: if (*aPos - aTypeOffset >= eArray) { michael@0: JS::Rooted array(aCx, JS_NewArrayObject(aCx, 0)); michael@0: if (!array) { michael@0: NS_WARNING("Failed to make array!"); michael@0: IDB_REPORT_INTERNAL_ERR(); michael@0: return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; michael@0: } michael@0: michael@0: aTypeOffset += eMaxType; michael@0: michael@0: if (aTypeOffset == eMaxType * MaxArrayCollapse) { michael@0: ++aPos; michael@0: aTypeOffset = 0; michael@0: } michael@0: michael@0: uint32_t index = 0; michael@0: JS::Rooted val(aCx); michael@0: while (aPos < aEnd && *aPos - aTypeOffset != eTerminator) { michael@0: nsresult rv = DecodeJSValInternal(aPos, aEnd, aCx, aTypeOffset, michael@0: &val, aRecursionDepth + 1); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: aTypeOffset = 0; michael@0: michael@0: if (!JS_SetElement(aCx, array, index++, val)) { michael@0: NS_WARNING("Failed to set array element!"); michael@0: IDB_REPORT_INTERNAL_ERR(); michael@0: return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; michael@0: } michael@0: } michael@0: michael@0: NS_ASSERTION(aPos >= aEnd || (*aPos % eMaxType) == eTerminator, michael@0: "Should have found end-of-array marker"); michael@0: ++aPos; michael@0: michael@0: aVal.setObject(*array); michael@0: } michael@0: else if (*aPos - aTypeOffset == eString) { michael@0: nsString key; michael@0: DecodeString(aPos, aEnd, key); michael@0: if (!xpc::StringToJsval(aCx, key, aVal)) { michael@0: IDB_REPORT_INTERNAL_ERR(); michael@0: return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; michael@0: } michael@0: } michael@0: else if (*aPos - aTypeOffset == eDate) { michael@0: double msec = static_cast(DecodeNumber(aPos, aEnd)); michael@0: JSObject* date = JS_NewDateObjectMsec(aCx, msec); michael@0: if (!date) { michael@0: IDB_WARNING("Failed to make date!"); michael@0: return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; michael@0: } michael@0: michael@0: aVal.setObject(*date); michael@0: } michael@0: else if (*aPos - aTypeOffset == eFloat) { michael@0: aVal.setDouble(DecodeNumber(aPos, aEnd)); michael@0: } michael@0: else { michael@0: NS_NOTREACHED("Unknown key type!"); michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: #define ONE_BYTE_LIMIT 0x7E michael@0: #define TWO_BYTE_LIMIT (0x3FFF+0x7F) michael@0: michael@0: #define ONE_BYTE_ADJUST 1 michael@0: #define TWO_BYTE_ADJUST (-0x7F) michael@0: #define THREE_BYTE_SHIFT 6 michael@0: michael@0: void michael@0: Key::EncodeString(const nsAString& aString, uint8_t aTypeOffset) michael@0: { michael@0: // First measure how long the encoded string will be. michael@0: michael@0: // The +2 is for initial 3 and trailing 0. We'll compensate for multi-byte michael@0: // chars below. michael@0: uint32_t size = aString.Length() + 2; michael@0: michael@0: const char16_t* start = aString.BeginReading(); michael@0: const char16_t* end = aString.EndReading(); michael@0: for (const char16_t* iter = start; iter < end; ++iter) { michael@0: if (*iter > ONE_BYTE_LIMIT) { michael@0: size += *iter > TWO_BYTE_LIMIT ? 2 : 1; michael@0: } michael@0: } michael@0: michael@0: // Allocate memory for the new size michael@0: uint32_t oldLen = mBuffer.Length(); michael@0: char* buffer; michael@0: if (!mBuffer.GetMutableData(&buffer, oldLen + size)) { michael@0: return; michael@0: } michael@0: buffer += oldLen; michael@0: michael@0: // Write type marker michael@0: *(buffer++) = eString + aTypeOffset; michael@0: michael@0: // Encode string michael@0: for (const char16_t* iter = start; iter < end; ++iter) { michael@0: if (*iter <= ONE_BYTE_LIMIT) { michael@0: *(buffer++) = *iter + ONE_BYTE_ADJUST; michael@0: } michael@0: else if (*iter <= TWO_BYTE_LIMIT) { michael@0: char16_t c = char16_t(*iter) + TWO_BYTE_ADJUST + 0x8000; michael@0: *(buffer++) = (char)(c >> 8); michael@0: *(buffer++) = (char)(c & 0xFF); michael@0: } michael@0: else { michael@0: uint32_t c = (uint32_t(*iter) << THREE_BYTE_SHIFT) | 0x00C00000; michael@0: *(buffer++) = (char)(c >> 16); michael@0: *(buffer++) = (char)(c >> 8); michael@0: *(buffer++) = (char)c; michael@0: } michael@0: } michael@0: michael@0: // Write end marker michael@0: *(buffer++) = eTerminator; michael@0: michael@0: NS_ASSERTION(buffer == mBuffer.EndReading(), "Wrote wrong number of bytes"); michael@0: } michael@0: michael@0: // static michael@0: void michael@0: Key::DecodeString(const unsigned char*& aPos, const unsigned char* aEnd, michael@0: nsString& aString) michael@0: { michael@0: NS_ASSERTION(*aPos % eMaxType == eString, "Don't call me!"); michael@0: michael@0: const unsigned char* buffer = aPos + 1; michael@0: michael@0: // First measure how big the decoded string will be. michael@0: uint32_t size = 0; michael@0: const unsigned char* iter; michael@0: for (iter = buffer; iter < aEnd && *iter != eTerminator; ++iter) { michael@0: if (*iter & 0x80) { michael@0: iter += (*iter & 0x40) ? 2 : 1; michael@0: } michael@0: ++size; michael@0: } michael@0: michael@0: // Set end so that we don't have to check for null termination in the loop michael@0: // below michael@0: if (iter < aEnd) { michael@0: aEnd = iter; michael@0: } michael@0: michael@0: char16_t* out; michael@0: if (size && !aString.GetMutableData(&out, size)) { michael@0: return; michael@0: } michael@0: michael@0: for (iter = buffer; iter < aEnd;) { michael@0: if (!(*iter & 0x80)) { michael@0: *out = *(iter++) - ONE_BYTE_ADJUST; michael@0: } michael@0: else if (!(*iter & 0x40)) { michael@0: char16_t c = (char16_t(*(iter++)) << 8); michael@0: if (iter < aEnd) { michael@0: c |= *(iter++); michael@0: } michael@0: *out = c - TWO_BYTE_ADJUST - 0x8000; michael@0: } michael@0: else { michael@0: uint32_t c = uint32_t(*(iter++)) << (16 - THREE_BYTE_SHIFT); michael@0: if (iter < aEnd) { michael@0: c |= uint32_t(*(iter++)) << (8 - THREE_BYTE_SHIFT); michael@0: } michael@0: if (iter < aEnd) { michael@0: c |= *(iter++) >> THREE_BYTE_SHIFT; michael@0: } michael@0: *out = (char16_t)c; michael@0: } michael@0: michael@0: ++out; michael@0: } michael@0: michael@0: NS_ASSERTION(!size || out == aString.EndReading(), michael@0: "Should have written the whole string"); michael@0: michael@0: aPos = iter + 1; michael@0: } michael@0: michael@0: union Float64Union { michael@0: double d; michael@0: uint64_t u; michael@0: }; michael@0: michael@0: void michael@0: Key::EncodeNumber(double aFloat, uint8_t aType) michael@0: { michael@0: // Allocate memory for the new size michael@0: uint32_t oldLen = mBuffer.Length(); michael@0: char* buffer; michael@0: if (!mBuffer.GetMutableData(&buffer, oldLen + 1 + sizeof(double))) { michael@0: return; michael@0: } michael@0: buffer += oldLen; michael@0: michael@0: *(buffer++) = aType; michael@0: michael@0: Float64Union pun; michael@0: pun.d = aFloat; michael@0: // Note: The subtraction from 0 below is necessary to fix michael@0: // MSVC build warning C4146 (negating an unsigned value). michael@0: uint64_t number = pun.u & PR_UINT64(0x8000000000000000) ? michael@0: (0 - pun.u) : michael@0: (pun.u | PR_UINT64(0x8000000000000000)); michael@0: michael@0: mozilla::BigEndian::writeUint64(buffer, number); michael@0: } michael@0: michael@0: // static michael@0: double michael@0: Key::DecodeNumber(const unsigned char*& aPos, const unsigned char* aEnd) michael@0: { michael@0: NS_ASSERTION(*aPos % eMaxType == eFloat || michael@0: *aPos % eMaxType == eDate, "Don't call me!"); michael@0: michael@0: ++aPos; michael@0: michael@0: uint64_t number = 0; michael@0: memcpy(&number, aPos, std::min(sizeof(number), aEnd - aPos)); michael@0: number = mozilla::NativeEndian::swapFromBigEndian(number); michael@0: michael@0: aPos += sizeof(number); michael@0: michael@0: Float64Union pun; michael@0: // Note: The subtraction from 0 below is necessary to fix michael@0: // MSVC build warning C4146 (negating an unsigned value). michael@0: pun.u = number & PR_UINT64(0x8000000000000000) ? michael@0: (number & ~PR_UINT64(0x8000000000000000)) : michael@0: (0 - number); michael@0: michael@0: return pun.d; michael@0: }