dom/indexedDB/Key.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set ts=2 et sw=2 tw=80: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include "mozilla/FloatingPoint.h"
michael@0 8
michael@0 9 #include "Key.h"
michael@0 10 #include "ReportInternalError.h"
michael@0 11
michael@0 12 #include "jsfriendapi.h"
michael@0 13 #include "nsAlgorithm.h"
michael@0 14 #include "nsJSUtils.h"
michael@0 15 #include "xpcpublic.h"
michael@0 16 #include "mozilla/Endian.h"
michael@0 17 #include <algorithm>
michael@0 18
michael@0 19 USING_INDEXEDDB_NAMESPACE
michael@0 20
michael@0 21 /*
michael@0 22 Here's how we encode keys:
michael@0 23
michael@0 24 Basic strategy is the following
michael@0 25
michael@0 26 Numbers: 1 n n n n n n n n ("n"s are encoded 64bit float)
michael@0 27 Dates: 2 n n n n n n n n ("n"s are encoded 64bit float)
michael@0 28 Strings: 3 s s s ... 0 ("s"s are encoded unicode bytes)
michael@0 29 Arrays: 4 i i i ... 0 ("i"s are encoded array items)
michael@0 30
michael@0 31
michael@0 32 When encoding floats, 64bit IEEE 754 are almost sortable, except that
michael@0 33 positive sort lower than negative, and negative sort descending. So we use
michael@0 34 the following encoding:
michael@0 35
michael@0 36 value < 0 ?
michael@0 37 (-to64bitInt(value)) :
michael@0 38 (to64bitInt(value) | 0x8000000000000000)
michael@0 39
michael@0 40
michael@0 41 When encoding strings, we use variable-size encoding per the following table
michael@0 42
michael@0 43 Chars 0 - 7E are encoded as 0xxxxxxx with 1 added
michael@0 44 Chars 7F - (3FFF+7F) are encoded as 10xxxxxx xxxxxxxx with 7F subtracted
michael@0 45 Chars (3FFF+80) - FFFF are encoded as 11xxxxxx xxxxxxxx xx000000
michael@0 46
michael@0 47 This ensures that the first byte is never encoded as 0, which means that the
michael@0 48 string terminator (per basic-stategy table) sorts before any character.
michael@0 49 The reason that (3FFF+80) - FFFF is encoded "shifted up" 6 bits is to maximize
michael@0 50 the chance that the last character is 0. See below for why.
michael@0 51
michael@0 52
michael@0 53 When encoding Arrays, we use an additional trick. Rather than adding a byte
michael@0 54 containing the value '4' to indicate type, we instead add 4 to the next byte.
michael@0 55 This is usually the byte containing the type of the first item in the array.
michael@0 56 So simple examples are
michael@0 57
michael@0 58 ["foo"] 7 s s s 0 0 // 7 is 3 + 4
michael@0 59 [1, 2] 5 n n n n n n n n 1 n n n n n n n n 0 // 5 is 1 + 4
michael@0 60
michael@0 61 Whe do this iteratively if the first item in the array is also an array
michael@0 62
michael@0 63 [["foo"]] 11 s s s 0 0 0
michael@0 64
michael@0 65 However, to avoid overflow in the byte, we only do this 3 times. If the first
michael@0 66 item in an array is an array, and that array also has an array as first item,
michael@0 67 we simply write out the total value accumulated so far and then follow the
michael@0 68 "normal" rules.
michael@0 69
michael@0 70 [[["foo"]]] 12 3 s s s 0 0 0 0
michael@0 71
michael@0 72 There is another edge case that can happen though, which is that the array
michael@0 73 doesn't have a first item to which we can add 4 to the type. Instead the
michael@0 74 next byte would normally be the array terminator (per basic-strategy table)
michael@0 75 so we simply add the 4 there.
michael@0 76
michael@0 77 [[]] 8 0 // 8 is 4 + 4 + 0
michael@0 78 [] 4 // 4 is 4 + 0
michael@0 79 [[], "foo"] 8 3 s s s 0 0 // 8 is 4 + 4 + 0
michael@0 80
michael@0 81 Note that the max-3-times rule kicks in before we get a chance to add to the
michael@0 82 array terminator
michael@0 83
michael@0 84 [[[]]] 12 0 0 0 // 12 is 4 + 4 + 4
michael@0 85
michael@0 86 We could use a much higher number than 3 at no complexity or performance cost,
michael@0 87 however it seems unlikely that it'll make a practical difference, and the low
michael@0 88 limit makes testing eaiser.
michael@0 89
michael@0 90
michael@0 91 As a final optimization we do a post-encoding step which drops all 0s at the
michael@0 92 end of the encoded buffer.
michael@0 93
michael@0 94 "foo" // 3 s s s
michael@0 95 1 // 1 bf f0
michael@0 96 ["a", "b"] // 7 s 3 s
michael@0 97 [1, 2] // 5 bf f0 0 0 0 0 0 0 1 c0
michael@0 98 [[]] // 8
michael@0 99 */
michael@0 100
michael@0 101 const int MaxArrayCollapse = 3;
michael@0 102
michael@0 103 const int MaxRecursionDepth = 256;
michael@0 104
michael@0 105 nsresult
michael@0 106 Key::EncodeJSValInternal(JSContext* aCx, JS::Handle<JS::Value> aVal,
michael@0 107 uint8_t aTypeOffset, uint16_t aRecursionDepth)
michael@0 108 {
michael@0 109 NS_ENSURE_TRUE(aRecursionDepth < MaxRecursionDepth, NS_ERROR_DOM_INDEXEDDB_DATA_ERR);
michael@0 110
michael@0 111 static_assert(eMaxType * MaxArrayCollapse < 256,
michael@0 112 "Unable to encode jsvals.");
michael@0 113
michael@0 114 if (aVal.isString()) {
michael@0 115 nsDependentJSString str;
michael@0 116 if (!str.init(aCx, aVal)) {
michael@0 117 IDB_REPORT_INTERNAL_ERR();
michael@0 118 return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR;
michael@0 119 }
michael@0 120 EncodeString(str, aTypeOffset);
michael@0 121 return NS_OK;
michael@0 122 }
michael@0 123
michael@0 124 if (aVal.isNumber()) {
michael@0 125 double d = aVal.toNumber();
michael@0 126 if (mozilla::IsNaN(d)) {
michael@0 127 return NS_ERROR_DOM_INDEXEDDB_DATA_ERR;
michael@0 128 }
michael@0 129 EncodeNumber(d, eFloat + aTypeOffset);
michael@0 130 return NS_OK;
michael@0 131 }
michael@0 132
michael@0 133 if (aVal.isObject()) {
michael@0 134 JS::Rooted<JSObject*> obj(aCx, &aVal.toObject());
michael@0 135 if (JS_IsArrayObject(aCx, obj)) {
michael@0 136 aTypeOffset += eMaxType;
michael@0 137
michael@0 138 if (aTypeOffset == eMaxType * MaxArrayCollapse) {
michael@0 139 mBuffer.Append(aTypeOffset);
michael@0 140 aTypeOffset = 0;
michael@0 141 }
michael@0 142 NS_ASSERTION((aTypeOffset % eMaxType) == 0 &&
michael@0 143 aTypeOffset < (eMaxType * MaxArrayCollapse),
michael@0 144 "Wrong typeoffset");
michael@0 145
michael@0 146 uint32_t length;
michael@0 147 if (!JS_GetArrayLength(aCx, obj, &length)) {
michael@0 148 IDB_REPORT_INTERNAL_ERR();
michael@0 149 return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR;
michael@0 150 }
michael@0 151
michael@0 152 for (uint32_t index = 0; index < length; index++) {
michael@0 153 JS::Rooted<JS::Value> val(aCx);
michael@0 154 if (!JS_GetElement(aCx, obj, index, &val)) {
michael@0 155 IDB_REPORT_INTERNAL_ERR();
michael@0 156 return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR;
michael@0 157 }
michael@0 158
michael@0 159 nsresult rv = EncodeJSValInternal(aCx, val, aTypeOffset,
michael@0 160 aRecursionDepth + 1);
michael@0 161 if (NS_FAILED(rv)) {
michael@0 162 return rv;
michael@0 163 }
michael@0 164
michael@0 165 aTypeOffset = 0;
michael@0 166 }
michael@0 167
michael@0 168 mBuffer.Append(eTerminator + aTypeOffset);
michael@0 169
michael@0 170 return NS_OK;
michael@0 171 }
michael@0 172
michael@0 173 if (JS_ObjectIsDate(aCx, obj)) {
michael@0 174 if (!js_DateIsValid(obj)) {
michael@0 175 return NS_ERROR_DOM_INDEXEDDB_DATA_ERR;
michael@0 176 }
michael@0 177 EncodeNumber(js_DateGetMsecSinceEpoch(obj), eDate + aTypeOffset);
michael@0 178 return NS_OK;
michael@0 179 }
michael@0 180 }
michael@0 181
michael@0 182 return NS_ERROR_DOM_INDEXEDDB_DATA_ERR;
michael@0 183 }
michael@0 184
michael@0 185 // static
michael@0 186 nsresult
michael@0 187 Key::DecodeJSValInternal(const unsigned char*& aPos, const unsigned char* aEnd,
michael@0 188 JSContext* aCx, uint8_t aTypeOffset, JS::MutableHandle<JS::Value> aVal,
michael@0 189 uint16_t aRecursionDepth)
michael@0 190 {
michael@0 191 NS_ENSURE_TRUE(aRecursionDepth < MaxRecursionDepth, NS_ERROR_DOM_INDEXEDDB_DATA_ERR);
michael@0 192
michael@0 193 if (*aPos - aTypeOffset >= eArray) {
michael@0 194 JS::Rooted<JSObject*> array(aCx, JS_NewArrayObject(aCx, 0));
michael@0 195 if (!array) {
michael@0 196 NS_WARNING("Failed to make array!");
michael@0 197 IDB_REPORT_INTERNAL_ERR();
michael@0 198 return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR;
michael@0 199 }
michael@0 200
michael@0 201 aTypeOffset += eMaxType;
michael@0 202
michael@0 203 if (aTypeOffset == eMaxType * MaxArrayCollapse) {
michael@0 204 ++aPos;
michael@0 205 aTypeOffset = 0;
michael@0 206 }
michael@0 207
michael@0 208 uint32_t index = 0;
michael@0 209 JS::Rooted<JS::Value> val(aCx);
michael@0 210 while (aPos < aEnd && *aPos - aTypeOffset != eTerminator) {
michael@0 211 nsresult rv = DecodeJSValInternal(aPos, aEnd, aCx, aTypeOffset,
michael@0 212 &val, aRecursionDepth + 1);
michael@0 213 NS_ENSURE_SUCCESS(rv, rv);
michael@0 214
michael@0 215 aTypeOffset = 0;
michael@0 216
michael@0 217 if (!JS_SetElement(aCx, array, index++, val)) {
michael@0 218 NS_WARNING("Failed to set array element!");
michael@0 219 IDB_REPORT_INTERNAL_ERR();
michael@0 220 return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR;
michael@0 221 }
michael@0 222 }
michael@0 223
michael@0 224 NS_ASSERTION(aPos >= aEnd || (*aPos % eMaxType) == eTerminator,
michael@0 225 "Should have found end-of-array marker");
michael@0 226 ++aPos;
michael@0 227
michael@0 228 aVal.setObject(*array);
michael@0 229 }
michael@0 230 else if (*aPos - aTypeOffset == eString) {
michael@0 231 nsString key;
michael@0 232 DecodeString(aPos, aEnd, key);
michael@0 233 if (!xpc::StringToJsval(aCx, key, aVal)) {
michael@0 234 IDB_REPORT_INTERNAL_ERR();
michael@0 235 return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR;
michael@0 236 }
michael@0 237 }
michael@0 238 else if (*aPos - aTypeOffset == eDate) {
michael@0 239 double msec = static_cast<double>(DecodeNumber(aPos, aEnd));
michael@0 240 JSObject* date = JS_NewDateObjectMsec(aCx, msec);
michael@0 241 if (!date) {
michael@0 242 IDB_WARNING("Failed to make date!");
michael@0 243 return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR;
michael@0 244 }
michael@0 245
michael@0 246 aVal.setObject(*date);
michael@0 247 }
michael@0 248 else if (*aPos - aTypeOffset == eFloat) {
michael@0 249 aVal.setDouble(DecodeNumber(aPos, aEnd));
michael@0 250 }
michael@0 251 else {
michael@0 252 NS_NOTREACHED("Unknown key type!");
michael@0 253 }
michael@0 254
michael@0 255 return NS_OK;
michael@0 256 }
michael@0 257
michael@0 258 #define ONE_BYTE_LIMIT 0x7E
michael@0 259 #define TWO_BYTE_LIMIT (0x3FFF+0x7F)
michael@0 260
michael@0 261 #define ONE_BYTE_ADJUST 1
michael@0 262 #define TWO_BYTE_ADJUST (-0x7F)
michael@0 263 #define THREE_BYTE_SHIFT 6
michael@0 264
michael@0 265 void
michael@0 266 Key::EncodeString(const nsAString& aString, uint8_t aTypeOffset)
michael@0 267 {
michael@0 268 // First measure how long the encoded string will be.
michael@0 269
michael@0 270 // The +2 is for initial 3 and trailing 0. We'll compensate for multi-byte
michael@0 271 // chars below.
michael@0 272 uint32_t size = aString.Length() + 2;
michael@0 273
michael@0 274 const char16_t* start = aString.BeginReading();
michael@0 275 const char16_t* end = aString.EndReading();
michael@0 276 for (const char16_t* iter = start; iter < end; ++iter) {
michael@0 277 if (*iter > ONE_BYTE_LIMIT) {
michael@0 278 size += *iter > TWO_BYTE_LIMIT ? 2 : 1;
michael@0 279 }
michael@0 280 }
michael@0 281
michael@0 282 // Allocate memory for the new size
michael@0 283 uint32_t oldLen = mBuffer.Length();
michael@0 284 char* buffer;
michael@0 285 if (!mBuffer.GetMutableData(&buffer, oldLen + size)) {
michael@0 286 return;
michael@0 287 }
michael@0 288 buffer += oldLen;
michael@0 289
michael@0 290 // Write type marker
michael@0 291 *(buffer++) = eString + aTypeOffset;
michael@0 292
michael@0 293 // Encode string
michael@0 294 for (const char16_t* iter = start; iter < end; ++iter) {
michael@0 295 if (*iter <= ONE_BYTE_LIMIT) {
michael@0 296 *(buffer++) = *iter + ONE_BYTE_ADJUST;
michael@0 297 }
michael@0 298 else if (*iter <= TWO_BYTE_LIMIT) {
michael@0 299 char16_t c = char16_t(*iter) + TWO_BYTE_ADJUST + 0x8000;
michael@0 300 *(buffer++) = (char)(c >> 8);
michael@0 301 *(buffer++) = (char)(c & 0xFF);
michael@0 302 }
michael@0 303 else {
michael@0 304 uint32_t c = (uint32_t(*iter) << THREE_BYTE_SHIFT) | 0x00C00000;
michael@0 305 *(buffer++) = (char)(c >> 16);
michael@0 306 *(buffer++) = (char)(c >> 8);
michael@0 307 *(buffer++) = (char)c;
michael@0 308 }
michael@0 309 }
michael@0 310
michael@0 311 // Write end marker
michael@0 312 *(buffer++) = eTerminator;
michael@0 313
michael@0 314 NS_ASSERTION(buffer == mBuffer.EndReading(), "Wrote wrong number of bytes");
michael@0 315 }
michael@0 316
michael@0 317 // static
michael@0 318 void
michael@0 319 Key::DecodeString(const unsigned char*& aPos, const unsigned char* aEnd,
michael@0 320 nsString& aString)
michael@0 321 {
michael@0 322 NS_ASSERTION(*aPos % eMaxType == eString, "Don't call me!");
michael@0 323
michael@0 324 const unsigned char* buffer = aPos + 1;
michael@0 325
michael@0 326 // First measure how big the decoded string will be.
michael@0 327 uint32_t size = 0;
michael@0 328 const unsigned char* iter;
michael@0 329 for (iter = buffer; iter < aEnd && *iter != eTerminator; ++iter) {
michael@0 330 if (*iter & 0x80) {
michael@0 331 iter += (*iter & 0x40) ? 2 : 1;
michael@0 332 }
michael@0 333 ++size;
michael@0 334 }
michael@0 335
michael@0 336 // Set end so that we don't have to check for null termination in the loop
michael@0 337 // below
michael@0 338 if (iter < aEnd) {
michael@0 339 aEnd = iter;
michael@0 340 }
michael@0 341
michael@0 342 char16_t* out;
michael@0 343 if (size && !aString.GetMutableData(&out, size)) {
michael@0 344 return;
michael@0 345 }
michael@0 346
michael@0 347 for (iter = buffer; iter < aEnd;) {
michael@0 348 if (!(*iter & 0x80)) {
michael@0 349 *out = *(iter++) - ONE_BYTE_ADJUST;
michael@0 350 }
michael@0 351 else if (!(*iter & 0x40)) {
michael@0 352 char16_t c = (char16_t(*(iter++)) << 8);
michael@0 353 if (iter < aEnd) {
michael@0 354 c |= *(iter++);
michael@0 355 }
michael@0 356 *out = c - TWO_BYTE_ADJUST - 0x8000;
michael@0 357 }
michael@0 358 else {
michael@0 359 uint32_t c = uint32_t(*(iter++)) << (16 - THREE_BYTE_SHIFT);
michael@0 360 if (iter < aEnd) {
michael@0 361 c |= uint32_t(*(iter++)) << (8 - THREE_BYTE_SHIFT);
michael@0 362 }
michael@0 363 if (iter < aEnd) {
michael@0 364 c |= *(iter++) >> THREE_BYTE_SHIFT;
michael@0 365 }
michael@0 366 *out = (char16_t)c;
michael@0 367 }
michael@0 368
michael@0 369 ++out;
michael@0 370 }
michael@0 371
michael@0 372 NS_ASSERTION(!size || out == aString.EndReading(),
michael@0 373 "Should have written the whole string");
michael@0 374
michael@0 375 aPos = iter + 1;
michael@0 376 }
michael@0 377
michael@0 378 union Float64Union {
michael@0 379 double d;
michael@0 380 uint64_t u;
michael@0 381 };
michael@0 382
michael@0 383 void
michael@0 384 Key::EncodeNumber(double aFloat, uint8_t aType)
michael@0 385 {
michael@0 386 // Allocate memory for the new size
michael@0 387 uint32_t oldLen = mBuffer.Length();
michael@0 388 char* buffer;
michael@0 389 if (!mBuffer.GetMutableData(&buffer, oldLen + 1 + sizeof(double))) {
michael@0 390 return;
michael@0 391 }
michael@0 392 buffer += oldLen;
michael@0 393
michael@0 394 *(buffer++) = aType;
michael@0 395
michael@0 396 Float64Union pun;
michael@0 397 pun.d = aFloat;
michael@0 398 // Note: The subtraction from 0 below is necessary to fix
michael@0 399 // MSVC build warning C4146 (negating an unsigned value).
michael@0 400 uint64_t number = pun.u & PR_UINT64(0x8000000000000000) ?
michael@0 401 (0 - pun.u) :
michael@0 402 (pun.u | PR_UINT64(0x8000000000000000));
michael@0 403
michael@0 404 mozilla::BigEndian::writeUint64(buffer, number);
michael@0 405 }
michael@0 406
michael@0 407 // static
michael@0 408 double
michael@0 409 Key::DecodeNumber(const unsigned char*& aPos, const unsigned char* aEnd)
michael@0 410 {
michael@0 411 NS_ASSERTION(*aPos % eMaxType == eFloat ||
michael@0 412 *aPos % eMaxType == eDate, "Don't call me!");
michael@0 413
michael@0 414 ++aPos;
michael@0 415
michael@0 416 uint64_t number = 0;
michael@0 417 memcpy(&number, aPos, std::min<size_t>(sizeof(number), aEnd - aPos));
michael@0 418 number = mozilla::NativeEndian::swapFromBigEndian(number);
michael@0 419
michael@0 420 aPos += sizeof(number);
michael@0 421
michael@0 422 Float64Union pun;
michael@0 423 // Note: The subtraction from 0 below is necessary to fix
michael@0 424 // MSVC build warning C4146 (negating an unsigned value).
michael@0 425 pun.u = number & PR_UINT64(0x8000000000000000) ?
michael@0 426 (number & ~PR_UINT64(0x8000000000000000)) :
michael@0 427 (0 - number);
michael@0 428
michael@0 429 return pun.d;
michael@0 430 }

mercurial