Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * JS string type implementation.
9 *
10 * In order to avoid unnecessary js_LockGCThing/js_UnlockGCThing calls, these
11 * native methods store strings (possibly newborn) converted from their 'this'
12 * parameter and arguments on the stack: 'this' conversions at argv[-1], arg
13 * conversions at their index (argv[0], argv[1]). This is a legitimate method
14 * of rooting things that might lose their newborn root due to subsequent GC
15 * allocations in the same native method.
16 */
18 #include "jsstr.h"
20 #include "mozilla/Attributes.h"
21 #include "mozilla/Casting.h"
22 #include "mozilla/CheckedInt.h"
23 #include "mozilla/FloatingPoint.h"
24 #include "mozilla/PodOperations.h"
26 #include <ctype.h>
27 #include <string.h>
29 #include "jsapi.h"
30 #include "jsarray.h"
31 #include "jsatom.h"
32 #include "jsbool.h"
33 #include "jscntxt.h"
34 #include "jsgc.h"
35 #include "jsnum.h"
36 #include "jsobj.h"
37 #include "jsopcode.h"
38 #include "jstypes.h"
39 #include "jsutil.h"
41 #include "builtin/Intl.h"
42 #include "builtin/RegExp.h"
43 #if ENABLE_INTL_API
44 #include "unicode/unorm.h"
45 #endif
46 #include "vm/GlobalObject.h"
47 #include "vm/Interpreter.h"
48 #include "vm/NumericConversions.h"
49 #include "vm/Opcodes.h"
50 #include "vm/RegExpObject.h"
51 #include "vm/RegExpStatics.h"
52 #include "vm/ScopeObject.h"
53 #include "vm/StringBuffer.h"
55 #include "jsinferinlines.h"
57 #include "vm/Interpreter-inl.h"
58 #include "vm/String-inl.h"
59 #include "vm/StringObject-inl.h"
61 using namespace js;
62 using namespace js::gc;
63 using namespace js::types;
64 using namespace js::unicode;
66 using mozilla::CheckedInt;
67 using mozilla::IsNaN;
68 using mozilla::IsNegativeZero;
69 using mozilla::PodCopy;
70 using mozilla::PodEqual;
71 using mozilla::SafeCast;
73 typedef Handle<JSLinearString*> HandleLinearString;
75 static JSLinearString *
76 ArgToRootedString(JSContext *cx, CallArgs &args, unsigned argno)
77 {
78 if (argno >= args.length())
79 return cx->names().undefined;
81 JSString *str = ToString<CanGC>(cx, args[argno]);
82 if (!str)
83 return nullptr;
85 args[argno].setString(str);
86 return str->ensureLinear(cx);
87 }
89 /*
90 * Forward declarations for URI encode/decode and helper routines
91 */
92 static bool
93 str_decodeURI(JSContext *cx, unsigned argc, Value *vp);
95 static bool
96 str_decodeURI_Component(JSContext *cx, unsigned argc, Value *vp);
98 static bool
99 str_encodeURI(JSContext *cx, unsigned argc, Value *vp);
101 static bool
102 str_encodeURI_Component(JSContext *cx, unsigned argc, Value *vp);
104 /*
105 * Global string methods
106 */
109 /* ES5 B.2.1 */
110 static bool
111 str_escape(JSContext *cx, unsigned argc, Value *vp)
112 {
113 CallArgs args = CallArgsFromVp(argc, vp);
115 static const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7',
116 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
118 JSLinearString *str = ArgToRootedString(cx, args, 0);
119 if (!str)
120 return false;
122 size_t length = str->length();
123 const jschar *chars = str->chars();
125 static const uint8_t shouldPassThrough[256] = {
126 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
127 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
128 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
129 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
130 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
131 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
132 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
133 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
134 };
136 /* In step 7, exactly 69 characters should pass through unencoded. */
137 #ifdef DEBUG
138 size_t count = 0;
139 for (size_t i = 0; i < sizeof(shouldPassThrough); i++) {
140 if (shouldPassThrough[i]) {
141 count++;
142 }
143 }
144 JS_ASSERT(count == 69);
145 #endif
148 /* Take a first pass and see how big the result string will need to be. */
149 size_t newlength = length;
150 for (size_t i = 0; i < length; i++) {
151 jschar ch = chars[i];
152 if (ch < 128 && shouldPassThrough[ch])
153 continue;
155 /* The character will be encoded as %XX or %uXXXX. */
156 newlength += (ch < 256) ? 2 : 5;
158 /*
159 * This overflow test works because newlength is incremented by at
160 * most 5 on each iteration.
161 */
162 if (newlength < length) {
163 js_ReportAllocationOverflow(cx);
164 return false;
165 }
166 }
168 if (newlength >= ~(size_t)0 / sizeof(jschar)) {
169 js_ReportAllocationOverflow(cx);
170 return false;
171 }
173 jschar *newchars = cx->pod_malloc<jschar>(newlength + 1);
174 if (!newchars)
175 return false;
176 size_t i, ni;
177 for (i = 0, ni = 0; i < length; i++) {
178 jschar ch = chars[i];
179 if (ch < 128 && shouldPassThrough[ch]) {
180 newchars[ni++] = ch;
181 } else if (ch < 256) {
182 newchars[ni++] = '%';
183 newchars[ni++] = digits[ch >> 4];
184 newchars[ni++] = digits[ch & 0xF];
185 } else {
186 newchars[ni++] = '%';
187 newchars[ni++] = 'u';
188 newchars[ni++] = digits[ch >> 12];
189 newchars[ni++] = digits[(ch & 0xF00) >> 8];
190 newchars[ni++] = digits[(ch & 0xF0) >> 4];
191 newchars[ni++] = digits[ch & 0xF];
192 }
193 }
194 JS_ASSERT(ni == newlength);
195 newchars[newlength] = 0;
197 JSString *retstr = js_NewString<CanGC>(cx, newchars, newlength);
198 if (!retstr) {
199 js_free(newchars);
200 return false;
201 }
203 args.rval().setString(retstr);
204 return true;
205 }
207 static inline bool
208 Unhex4(const jschar *chars, jschar *result)
209 {
210 jschar a = chars[0],
211 b = chars[1],
212 c = chars[2],
213 d = chars[3];
215 if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
216 return false;
218 *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
219 return true;
220 }
222 static inline bool
223 Unhex2(const jschar *chars, jschar *result)
224 {
225 jschar a = chars[0],
226 b = chars[1];
228 if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
229 return false;
231 *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
232 return true;
233 }
235 /* ES5 B.2.2 */
236 static bool
237 str_unescape(JSContext *cx, unsigned argc, Value *vp)
238 {
239 CallArgs args = CallArgsFromVp(argc, vp);
241 /* Step 1. */
242 JSLinearString *str = ArgToRootedString(cx, args, 0);
243 if (!str)
244 return false;
246 /*
247 * NB: use signed integers for length/index to allow simple length
248 * comparisons without unsigned-underflow hazards.
249 */
250 JS_STATIC_ASSERT(JSString::MAX_LENGTH <= INT_MAX);
252 /* Step 2. */
253 int length = str->length();
254 const jschar *chars = str->chars();
256 /* Step 3. */
257 StringBuffer sb(cx);
259 /*
260 * Note that the spec algorithm has been optimized to avoid building
261 * a string in the case where no escapes are present.
262 */
264 /* Step 4. */
265 int k = 0;
266 bool building = false;
268 while (true) {
269 /* Step 5. */
270 if (k == length) {
271 JSLinearString *result;
272 if (building) {
273 result = sb.finishString();
274 if (!result)
275 return false;
276 } else {
277 result = str;
278 }
280 args.rval().setString(result);
281 return true;
282 }
284 /* Step 6. */
285 jschar c = chars[k];
287 /* Step 7. */
288 if (c != '%')
289 goto step_18;
291 /* Step 8. */
292 if (k > length - 6)
293 goto step_14;
295 /* Step 9. */
296 if (chars[k + 1] != 'u')
297 goto step_14;
299 #define ENSURE_BUILDING \
300 JS_BEGIN_MACRO \
301 if (!building) { \
302 building = true; \
303 if (!sb.reserve(length)) \
304 return false; \
305 sb.infallibleAppend(chars, chars + k); \
306 } \
307 JS_END_MACRO
309 /* Step 10-13. */
310 if (Unhex4(&chars[k + 2], &c)) {
311 ENSURE_BUILDING;
312 k += 5;
313 goto step_18;
314 }
316 step_14:
317 /* Step 14. */
318 if (k > length - 3)
319 goto step_18;
321 /* Step 15-17. */
322 if (Unhex2(&chars[k + 1], &c)) {
323 ENSURE_BUILDING;
324 k += 2;
325 }
327 step_18:
328 if (building)
329 sb.infallibleAppend(c);
331 /* Step 19. */
332 k += 1;
333 }
334 #undef ENSURE_BUILDING
335 }
337 #if JS_HAS_UNEVAL
338 static bool
339 str_uneval(JSContext *cx, unsigned argc, Value *vp)
340 {
341 CallArgs args = CallArgsFromVp(argc, vp);
342 JSString *str = ValueToSource(cx, args.get(0));
343 if (!str)
344 return false;
346 args.rval().setString(str);
347 return true;
348 }
349 #endif
351 static const JSFunctionSpec string_functions[] = {
352 JS_FN(js_escape_str, str_escape, 1,0),
353 JS_FN(js_unescape_str, str_unescape, 1,0),
354 #if JS_HAS_UNEVAL
355 JS_FN(js_uneval_str, str_uneval, 1,0),
356 #endif
357 JS_FN(js_decodeURI_str, str_decodeURI, 1,0),
358 JS_FN(js_encodeURI_str, str_encodeURI, 1,0),
359 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,0),
360 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,0),
362 JS_FS_END
363 };
365 const jschar js_empty_ucstr[] = {0};
366 const JSSubString js_EmptySubString = {0, js_empty_ucstr};
368 static const unsigned STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
370 static bool
371 str_enumerate(JSContext *cx, HandleObject obj)
372 {
373 RootedString str(cx, obj->as<StringObject>().unbox());
374 RootedValue value(cx);
375 for (size_t i = 0, length = str->length(); i < length; i++) {
376 JSString *str1 = js_NewDependentString(cx, str, i, 1);
377 if (!str1)
378 return false;
379 value.setString(str1);
380 if (!JSObject::defineElement(cx, obj, i, value,
381 JS_PropertyStub, JS_StrictPropertyStub,
382 STRING_ELEMENT_ATTRS))
383 {
384 return false;
385 }
386 }
388 return true;
389 }
391 bool
392 js::str_resolve(JSContext *cx, HandleObject obj, HandleId id, MutableHandleObject objp)
393 {
394 if (!JSID_IS_INT(id))
395 return true;
397 RootedString str(cx, obj->as<StringObject>().unbox());
399 int32_t slot = JSID_TO_INT(id);
400 if ((size_t)slot < str->length()) {
401 JSString *str1 = cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
402 if (!str1)
403 return false;
404 RootedValue value(cx, StringValue(str1));
405 if (!JSObject::defineElement(cx, obj, uint32_t(slot), value, nullptr, nullptr,
406 STRING_ELEMENT_ATTRS))
407 {
408 return false;
409 }
410 objp.set(obj);
411 }
412 return true;
413 }
415 const Class StringObject::class_ = {
416 js_String_str,
417 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
418 JSCLASS_NEW_RESOLVE | JSCLASS_HAS_CACHED_PROTO(JSProto_String),
419 JS_PropertyStub, /* addProperty */
420 JS_DeletePropertyStub, /* delProperty */
421 JS_PropertyStub, /* getProperty */
422 JS_StrictPropertyStub, /* setProperty */
423 str_enumerate,
424 (JSResolveOp)str_resolve,
425 JS_ConvertStub
426 };
428 /*
429 * Returns a JSString * for the |this| value associated with 'call', or throws
430 * a TypeError if |this| is null or undefined. This algorithm is the same as
431 * calling CheckObjectCoercible(this), then returning ToString(this), as all
432 * String.prototype.* methods do (other than toString and valueOf).
433 */
434 static MOZ_ALWAYS_INLINE JSString *
435 ThisToStringForStringProto(JSContext *cx, CallReceiver call)
436 {
437 JS_CHECK_RECURSION(cx, return nullptr);
439 if (call.thisv().isString())
440 return call.thisv().toString();
442 if (call.thisv().isObject()) {
443 RootedObject obj(cx, &call.thisv().toObject());
444 if (obj->is<StringObject>()) {
445 Rooted<jsid> id(cx, NameToId(cx->names().toString));
446 if (ClassMethodIsNative(cx, obj, &StringObject::class_, id, js_str_toString)) {
447 JSString *str = obj->as<StringObject>().unbox();
448 call.setThis(StringValue(str));
449 return str;
450 }
451 }
452 } else if (call.thisv().isNullOrUndefined()) {
453 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_CANT_CONVERT_TO,
454 call.thisv().isNull() ? "null" : "undefined", "object");
455 return nullptr;
456 }
458 JSString *str = ToStringSlow<CanGC>(cx, call.thisv());
459 if (!str)
460 return nullptr;
462 call.setThis(StringValue(str));
463 return str;
464 }
466 MOZ_ALWAYS_INLINE bool
467 IsString(HandleValue v)
468 {
469 return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
470 }
472 #if JS_HAS_TOSOURCE
474 /*
475 * String.prototype.quote is generic (as are most string methods), unlike
476 * toSource, toString, and valueOf.
477 */
478 static bool
479 str_quote(JSContext *cx, unsigned argc, Value *vp)
480 {
481 CallArgs args = CallArgsFromVp(argc, vp);
482 RootedString str(cx, ThisToStringForStringProto(cx, args));
483 if (!str)
484 return false;
485 str = js_QuoteString(cx, str, '"');
486 if (!str)
487 return false;
488 args.rval().setString(str);
489 return true;
490 }
492 MOZ_ALWAYS_INLINE bool
493 str_toSource_impl(JSContext *cx, CallArgs args)
494 {
495 JS_ASSERT(IsString(args.thisv()));
497 Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
498 if (!str)
499 return false;
501 str = js_QuoteString(cx, str, '"');
502 if (!str)
503 return false;
505 StringBuffer sb(cx);
506 if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
507 return false;
509 str = sb.finishString();
510 if (!str)
511 return false;
512 args.rval().setString(str);
513 return true;
514 }
516 static bool
517 str_toSource(JSContext *cx, unsigned argc, Value *vp)
518 {
519 CallArgs args = CallArgsFromVp(argc, vp);
520 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
521 }
523 #endif /* JS_HAS_TOSOURCE */
525 MOZ_ALWAYS_INLINE bool
526 str_toString_impl(JSContext *cx, CallArgs args)
527 {
528 JS_ASSERT(IsString(args.thisv()));
530 args.rval().setString(args.thisv().isString()
531 ? args.thisv().toString()
532 : args.thisv().toObject().as<StringObject>().unbox());
533 return true;
534 }
536 bool
537 js_str_toString(JSContext *cx, unsigned argc, Value *vp)
538 {
539 CallArgs args = CallArgsFromVp(argc, vp);
540 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
541 }
543 /*
544 * Java-like string native methods.
545 */
547 static MOZ_ALWAYS_INLINE bool
548 ValueToIntegerRange(JSContext *cx, HandleValue v, int32_t *out)
549 {
550 if (v.isInt32()) {
551 *out = v.toInt32();
552 } else {
553 double d;
554 if (!ToInteger(cx, v, &d))
555 return false;
556 if (d > INT32_MAX)
557 *out = INT32_MAX;
558 else if (d < INT32_MIN)
559 *out = INT32_MIN;
560 else
561 *out = int32_t(d);
562 }
564 return true;
565 }
567 static JSString *
568 DoSubstr(JSContext *cx, JSString *str, size_t begin, size_t len)
569 {
570 /*
571 * Optimization for one level deep ropes.
572 * This is common for the following pattern:
573 *
574 * while() {
575 * text = text.substr(0, x) + "bla" + text.substr(x)
576 * test.charCodeAt(x + 1)
577 * }
578 */
579 if (str->isRope()) {
580 JSRope *rope = &str->asRope();
582 /* Substring is totally in leftChild of rope. */
583 if (begin + len <= rope->leftChild()->length()) {
584 str = rope->leftChild();
585 return js_NewDependentString(cx, str, begin, len);
586 }
588 /* Substring is totally in rightChild of rope. */
589 if (begin >= rope->leftChild()->length()) {
590 str = rope->rightChild();
591 begin -= rope->leftChild()->length();
592 return js_NewDependentString(cx, str, begin, len);
593 }
595 /*
596 * Requested substring is partly in the left and partly in right child.
597 * Create a rope of substrings for both childs.
598 */
599 JS_ASSERT (begin < rope->leftChild()->length() &&
600 begin + len > rope->leftChild()->length());
602 size_t lhsLength = rope->leftChild()->length() - begin;
603 size_t rhsLength = begin + len - rope->leftChild()->length();
605 Rooted<JSRope *> ropeRoot(cx, rope);
606 RootedString lhs(cx, js_NewDependentString(cx, ropeRoot->leftChild(),
607 begin, lhsLength));
608 if (!lhs)
609 return nullptr;
611 RootedString rhs(cx, js_NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
612 if (!rhs)
613 return nullptr;
615 return JSRope::new_<CanGC>(cx, lhs, rhs, len);
616 }
618 return js_NewDependentString(cx, str, begin, len);
619 }
621 static bool
622 str_substring(JSContext *cx, unsigned argc, Value *vp)
623 {
624 CallArgs args = CallArgsFromVp(argc, vp);
626 JSString *str = ThisToStringForStringProto(cx, args);
627 if (!str)
628 return false;
630 int32_t length, begin, end;
631 if (args.length() > 0) {
632 end = length = int32_t(str->length());
634 if (args[0].isInt32()) {
635 begin = args[0].toInt32();
636 } else {
637 RootedString strRoot(cx, str);
638 if (!ValueToIntegerRange(cx, args[0], &begin))
639 return false;
640 str = strRoot;
641 }
643 if (begin < 0)
644 begin = 0;
645 else if (begin > length)
646 begin = length;
648 if (args.hasDefined(1)) {
649 if (args[1].isInt32()) {
650 end = args[1].toInt32();
651 } else {
652 RootedString strRoot(cx, str);
653 if (!ValueToIntegerRange(cx, args[1], &end))
654 return false;
655 str = strRoot;
656 }
658 if (end > length) {
659 end = length;
660 } else {
661 if (end < 0)
662 end = 0;
663 if (end < begin) {
664 int32_t tmp = begin;
665 begin = end;
666 end = tmp;
667 }
668 }
669 }
671 str = DoSubstr(cx, str, size_t(begin), size_t(end - begin));
672 if (!str)
673 return false;
674 }
676 args.rval().setString(str);
677 return true;
678 }
680 JSString* JS_FASTCALL
681 js_toLowerCase(JSContext *cx, JSString *str)
682 {
683 size_t n = str->length();
684 const jschar *s = str->getChars(cx);
685 if (!s)
686 return nullptr;
688 jschar *news = cx->pod_malloc<jschar>(n + 1);
689 if (!news)
690 return nullptr;
691 for (size_t i = 0; i < n; i++)
692 news[i] = unicode::ToLowerCase(s[i]);
693 news[n] = 0;
694 str = js_NewString<CanGC>(cx, news, n);
695 if (!str) {
696 js_free(news);
697 return nullptr;
698 }
699 return str;
700 }
702 static inline bool
703 ToLowerCaseHelper(JSContext *cx, CallReceiver call)
704 {
705 RootedString str(cx, ThisToStringForStringProto(cx, call));
706 if (!str)
707 return false;
709 str = js_toLowerCase(cx, str);
710 if (!str)
711 return false;
713 call.rval().setString(str);
714 return true;
715 }
717 static bool
718 str_toLowerCase(JSContext *cx, unsigned argc, Value *vp)
719 {
720 return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
721 }
723 static bool
724 str_toLocaleLowerCase(JSContext *cx, unsigned argc, Value *vp)
725 {
726 CallArgs args = CallArgsFromVp(argc, vp);
728 /*
729 * Forcefully ignore the first (or any) argument and return toLowerCase(),
730 * ECMA has reserved that argument, presumably for defining the locale.
731 */
732 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
733 RootedString str(cx, ThisToStringForStringProto(cx, args));
734 if (!str)
735 return false;
737 RootedValue result(cx);
738 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
739 return false;
741 args.rval().set(result);
742 return true;
743 }
745 return ToLowerCaseHelper(cx, args);
746 }
748 JSString* JS_FASTCALL
749 js_toUpperCase(JSContext *cx, JSString *str)
750 {
751 size_t n = str->length();
752 const jschar *s = str->getChars(cx);
753 if (!s)
754 return nullptr;
755 jschar *news = cx->pod_malloc<jschar>(n + 1);
756 if (!news)
757 return nullptr;
758 for (size_t i = 0; i < n; i++)
759 news[i] = unicode::ToUpperCase(s[i]);
760 news[n] = 0;
761 str = js_NewString<CanGC>(cx, news, n);
762 if (!str) {
763 js_free(news);
764 return nullptr;
765 }
766 return str;
767 }
769 static bool
770 ToUpperCaseHelper(JSContext *cx, CallReceiver call)
771 {
772 RootedString str(cx, ThisToStringForStringProto(cx, call));
773 if (!str)
774 return false;
776 str = js_toUpperCase(cx, str);
777 if (!str)
778 return false;
780 call.rval().setString(str);
781 return true;
782 }
784 static bool
785 str_toUpperCase(JSContext *cx, unsigned argc, Value *vp)
786 {
787 return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
788 }
790 static bool
791 str_toLocaleUpperCase(JSContext *cx, unsigned argc, Value *vp)
792 {
793 CallArgs args = CallArgsFromVp(argc, vp);
795 /*
796 * Forcefully ignore the first (or any) argument and return toUpperCase(),
797 * ECMA has reserved that argument, presumably for defining the locale.
798 */
799 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
800 RootedString str(cx, ThisToStringForStringProto(cx, args));
801 if (!str)
802 return false;
804 RootedValue result(cx);
805 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
806 return false;
808 args.rval().set(result);
809 return true;
810 }
812 return ToUpperCaseHelper(cx, args);
813 }
815 #if !EXPOSE_INTL_API
816 static bool
817 str_localeCompare(JSContext *cx, unsigned argc, Value *vp)
818 {
819 CallArgs args = CallArgsFromVp(argc, vp);
820 RootedString str(cx, ThisToStringForStringProto(cx, args));
821 if (!str)
822 return false;
824 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
825 if (!thatStr)
826 return false;
828 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeCompare) {
829 RootedValue result(cx);
830 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr, &result))
831 return false;
833 args.rval().set(result);
834 return true;
835 }
837 int32_t result;
838 if (!CompareStrings(cx, str, thatStr, &result))
839 return false;
841 args.rval().setInt32(result);
842 return true;
843 }
844 #endif
846 #if EXPOSE_INTL_API
847 static const size_t SB_LENGTH = 32;
849 /* ES6 20140210 draft 21.1.3.12. */
850 static bool
851 str_normalize(JSContext *cx, unsigned argc, Value *vp)
852 {
853 CallArgs args = CallArgsFromVp(argc, vp);
855 // Steps 1-3.
856 RootedString str(cx, ThisToStringForStringProto(cx, args));
857 if (!str)
858 return false;
860 // Step 4.
861 UNormalizationMode form;
862 if (!args.hasDefined(0)) {
863 form = UNORM_NFC;
864 } else {
865 // Steps 5-6.
866 Rooted<JSLinearString*> formStr(cx, ArgToRootedString(cx, args, 0));
867 if (!formStr)
868 return false;
870 // Step 7.
871 if (formStr == cx->names().NFC) {
872 form = UNORM_NFC;
873 } else if (formStr == cx->names().NFD) {
874 form = UNORM_NFD;
875 } else if (formStr == cx->names().NFKC) {
876 form = UNORM_NFKC;
877 } else if (formStr == cx->names().NFKD) {
878 form = UNORM_NFKD;
879 } else {
880 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr,
881 JSMSG_INVALID_NORMALIZE_FORM);
882 return false;
883 }
884 }
886 // Step 8.
887 Rooted<JSFlatString*> flatStr(cx, str->ensureFlat(cx));
888 if (!flatStr)
889 return false;
890 const UChar *srcChars = JSCharToUChar(flatStr->chars());
891 int32_t srcLen = SafeCast<int32_t>(flatStr->length());
892 StringBuffer chars(cx);
893 if (!chars.resize(SB_LENGTH))
894 return false;
895 UErrorCode status = U_ZERO_ERROR;
896 int32_t size = unorm_normalize(srcChars, srcLen, form, 0,
897 JSCharToUChar(chars.begin()), SB_LENGTH,
898 &status);
899 if (status == U_BUFFER_OVERFLOW_ERROR) {
900 if (!chars.resize(size))
901 return false;
902 status = U_ZERO_ERROR;
903 #ifdef DEBUG
904 int32_t finalSize =
905 #endif
906 unorm_normalize(srcChars, srcLen, form, 0,
907 JSCharToUChar(chars.begin()), size,
908 &status);
909 MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently");
910 }
911 if (U_FAILURE(status))
912 return false;
913 // Trim any unused characters.
914 if (!chars.resize(size))
915 return false;
916 RootedString ns(cx, chars.finishString());
917 if (!ns)
918 return false;
920 // Step 9.
921 args.rval().setString(ns);
922 return true;
923 }
924 #endif
926 bool
927 js_str_charAt(JSContext *cx, unsigned argc, Value *vp)
928 {
929 CallArgs args = CallArgsFromVp(argc, vp);
931 RootedString str(cx);
932 size_t i;
933 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
934 str = args.thisv().toString();
935 i = size_t(args[0].toInt32());
936 if (i >= str->length())
937 goto out_of_range;
938 } else {
939 str = ThisToStringForStringProto(cx, args);
940 if (!str)
941 return false;
943 double d = 0.0;
944 if (args.length() > 0 && !ToInteger(cx, args[0], &d))
945 return false;
947 if (d < 0 || str->length() <= d)
948 goto out_of_range;
949 i = size_t(d);
950 }
952 str = cx->staticStrings().getUnitStringForElement(cx, str, i);
953 if (!str)
954 return false;
955 args.rval().setString(str);
956 return true;
958 out_of_range:
959 args.rval().setString(cx->runtime()->emptyString);
960 return true;
961 }
963 bool
964 js_str_charCodeAt(JSContext *cx, unsigned argc, Value *vp)
965 {
966 CallArgs args = CallArgsFromVp(argc, vp);
968 RootedString str(cx);
969 size_t i;
970 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
971 str = args.thisv().toString();
972 i = size_t(args[0].toInt32());
973 if (i >= str->length())
974 goto out_of_range;
975 } else {
976 str = ThisToStringForStringProto(cx, args);
977 if (!str)
978 return false;
980 double d = 0.0;
981 if (args.length() > 0 && !ToInteger(cx, args[0], &d))
982 return false;
984 if (d < 0 || str->length() <= d)
985 goto out_of_range;
986 i = size_t(d);
987 }
989 jschar c;
990 if (!str->getChar(cx, i, &c))
991 return false;
992 args.rval().setInt32(c);
993 return true;
995 out_of_range:
996 args.rval().setNaN();
997 return true;
998 }
1000 /*
1001 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1002 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1003 *
1004 * Return the index of pat in text, or -1 if not found.
1005 */
1006 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1007 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1008 static const int sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
1010 int
1011 js_BoyerMooreHorspool(const jschar *text, uint32_t textlen,
1012 const jschar *pat, uint32_t patlen)
1013 {
1014 uint8_t skip[sBMHCharSetSize];
1016 JS_ASSERT(0 < patlen && patlen <= sBMHPatLenMax);
1017 for (uint32_t i = 0; i < sBMHCharSetSize; i++)
1018 skip[i] = (uint8_t)patlen;
1019 uint32_t m = patlen - 1;
1020 for (uint32_t i = 0; i < m; i++) {
1021 jschar c = pat[i];
1022 if (c >= sBMHCharSetSize)
1023 return sBMHBadPattern;
1024 skip[c] = (uint8_t)(m - i);
1025 }
1026 jschar c;
1027 for (uint32_t k = m;
1028 k < textlen;
1029 k += ((c = text[k]) >= sBMHCharSetSize) ? patlen : skip[c]) {
1030 for (uint32_t i = k, j = m; ; i--, j--) {
1031 if (text[i] != pat[j])
1032 break;
1033 if (j == 0)
1034 return static_cast<int>(i); /* safe: max string size */
1035 }
1036 }
1037 return -1;
1038 }
1040 struct MemCmp {
1041 typedef uint32_t Extent;
1042 static MOZ_ALWAYS_INLINE Extent computeExtent(const jschar *, uint32_t patlen) {
1043 return (patlen - 1) * sizeof(jschar);
1044 }
1045 static MOZ_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
1046 return memcmp(p, t, extent) == 0;
1047 }
1048 };
1050 struct ManualCmp {
1051 typedef const jschar *Extent;
1052 static MOZ_ALWAYS_INLINE Extent computeExtent(const jschar *pat, uint32_t patlen) {
1053 return pat + patlen;
1054 }
1055 static MOZ_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
1056 for (; p != extent; ++p, ++t) {
1057 if (*p != *t)
1058 return false;
1059 }
1060 return true;
1061 }
1062 };
1064 template <class InnerMatch>
1065 static int
1066 UnrolledMatch(const jschar *text, uint32_t textlen, const jschar *pat, uint32_t patlen)
1067 {
1068 JS_ASSERT(patlen > 0 && textlen > 0);
1069 const jschar *textend = text + textlen - (patlen - 1);
1070 const jschar p0 = *pat;
1071 const jschar *const patNext = pat + 1;
1072 const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
1073 uint8_t fixup;
1075 const jschar *t = text;
1076 switch ((textend - t) & 7) {
1077 case 0: if (*t++ == p0) { fixup = 8; goto match; }
1078 case 7: if (*t++ == p0) { fixup = 7; goto match; }
1079 case 6: if (*t++ == p0) { fixup = 6; goto match; }
1080 case 5: if (*t++ == p0) { fixup = 5; goto match; }
1081 case 4: if (*t++ == p0) { fixup = 4; goto match; }
1082 case 3: if (*t++ == p0) { fixup = 3; goto match; }
1083 case 2: if (*t++ == p0) { fixup = 2; goto match; }
1084 case 1: if (*t++ == p0) { fixup = 1; goto match; }
1085 }
1086 while (t != textend) {
1087 if (t[0] == p0) { t += 1; fixup = 8; goto match; }
1088 if (t[1] == p0) { t += 2; fixup = 7; goto match; }
1089 if (t[2] == p0) { t += 3; fixup = 6; goto match; }
1090 if (t[3] == p0) { t += 4; fixup = 5; goto match; }
1091 if (t[4] == p0) { t += 5; fixup = 4; goto match; }
1092 if (t[5] == p0) { t += 6; fixup = 3; goto match; }
1093 if (t[6] == p0) { t += 7; fixup = 2; goto match; }
1094 if (t[7] == p0) { t += 8; fixup = 1; goto match; }
1095 t += 8;
1096 continue;
1097 do {
1098 if (*t++ == p0) {
1099 match:
1100 if (!InnerMatch::match(patNext, t, extent))
1101 goto failed_match;
1102 return t - text - 1;
1103 }
1104 failed_match:;
1105 } while (--fixup > 0);
1106 }
1107 return -1;
1108 }
1110 static MOZ_ALWAYS_INLINE int
1111 StringMatch(const jschar *text, uint32_t textlen,
1112 const jschar *pat, uint32_t patlen)
1113 {
1114 if (patlen == 0)
1115 return 0;
1116 if (textlen < patlen)
1117 return -1;
1119 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1120 /*
1121 * Given enough registers, the unrolled loop below is faster than the
1122 * following loop. 32-bit x86 does not have enough registers.
1123 */
1124 if (patlen == 1) {
1125 const jschar p0 = *pat;
1126 for (const jschar *c = text, *end = text + textlen; c != end; ++c) {
1127 if (*c == p0)
1128 return c - text;
1129 }
1130 return -1;
1131 }
1132 #endif
1134 /*
1135 * If the text or pattern string is short, BMH will be more expensive than
1136 * the basic linear scan due to initialization cost and a more complex loop
1137 * body. While the correct threshold is input-dependent, we can make a few
1138 * conservative observations:
1139 * - When |textlen| is "big enough", the initialization time will be
1140 * proportionally small, so the worst-case slowdown is minimized.
1141 * - When |patlen| is "too small", even the best case for BMH will be
1142 * slower than a simple scan for large |textlen| due to the more complex
1143 * loop body of BMH.
1144 * From this, the values for "big enough" and "too small" are determined
1145 * empirically. See bug 526348.
1146 */
1147 if (textlen >= 512 && patlen >= 11 && patlen <= sBMHPatLenMax) {
1148 int index = js_BoyerMooreHorspool(text, textlen, pat, patlen);
1149 if (index != sBMHBadPattern)
1150 return index;
1151 }
1153 /*
1154 * For big patterns with large potential overlap we want the SIMD-optimized
1155 * speed of memcmp. For small patterns, a simple loop is faster.
1156 *
1157 * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1158 */
1159 return
1160 #if !defined(__linux__)
1161 patlen > 128 ? UnrolledMatch<MemCmp>(text, textlen, pat, patlen)
1162 :
1163 #endif
1164 UnrolledMatch<ManualCmp>(text, textlen, pat, patlen);
1165 }
1167 static const size_t sRopeMatchThresholdRatioLog2 = 5;
1169 bool
1170 js::StringHasPattern(const jschar *text, uint32_t textlen,
1171 const jschar *pat, uint32_t patlen)
1172 {
1173 return StringMatch(text, textlen, pat, patlen) != -1;
1174 }
1176 // When an algorithm does not need a string represented as a single linear
1177 // array of characters, this range utility may be used to traverse the string a
1178 // sequence of linear arrays of characters. This avoids flattening ropes.
1179 class StringSegmentRange
1180 {
1181 // If malloc() shows up in any profiles from this vector, we can add a new
1182 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1183 AutoStringVector stack;
1184 Rooted<JSLinearString*> cur;
1186 bool settle(JSString *str) {
1187 while (str->isRope()) {
1188 JSRope &rope = str->asRope();
1189 if (!stack.append(rope.rightChild()))
1190 return false;
1191 str = rope.leftChild();
1192 }
1193 cur = &str->asLinear();
1194 return true;
1195 }
1197 public:
1198 StringSegmentRange(JSContext *cx)
1199 : stack(cx), cur(cx)
1200 {}
1202 MOZ_WARN_UNUSED_RESULT bool init(JSString *str) {
1203 JS_ASSERT(stack.empty());
1204 return settle(str);
1205 }
1207 bool empty() const {
1208 return cur == nullptr;
1209 }
1211 JSLinearString *front() const {
1212 JS_ASSERT(!cur->isRope());
1213 return cur;
1214 }
1216 MOZ_WARN_UNUSED_RESULT bool popFront() {
1217 JS_ASSERT(!empty());
1218 if (stack.empty()) {
1219 cur = nullptr;
1220 return true;
1221 }
1222 return settle(stack.popCopy());
1223 }
1224 };
1226 /*
1227 * RopeMatch takes the text to search and the pattern to search for in the text.
1228 * RopeMatch returns false on OOM and otherwise returns the match index through
1229 * the 'match' outparam (-1 for not found).
1230 */
1231 static bool
1232 RopeMatch(JSContext *cx, JSString *textstr, const jschar *pat, uint32_t patlen, int *match)
1233 {
1234 JS_ASSERT(textstr->isRope());
1236 if (patlen == 0) {
1237 *match = 0;
1238 return true;
1239 }
1240 if (textstr->length() < patlen) {
1241 *match = -1;
1242 return true;
1243 }
1245 /*
1246 * List of leaf nodes in the rope. If we run out of memory when trying to
1247 * append to this list, we can still fall back to StringMatch, so use the
1248 * system allocator so we don't report OOM in that case.
1249 */
1250 Vector<JSLinearString *, 16, SystemAllocPolicy> strs;
1252 /*
1253 * We don't want to do rope matching if there is a poor node-to-char ratio,
1254 * since this means spending a lot of time in the match loop below. We also
1255 * need to build the list of leaf nodes. Do both here: iterate over the
1256 * nodes so long as there are not too many.
1257 */
1258 {
1259 size_t textstrlen = textstr->length();
1260 size_t threshold = textstrlen >> sRopeMatchThresholdRatioLog2;
1261 StringSegmentRange r(cx);
1262 if (!r.init(textstr))
1263 return false;
1264 while (!r.empty()) {
1265 if (threshold-- == 0 || !strs.append(r.front())) {
1266 const jschar *chars = textstr->getChars(cx);
1267 if (!chars)
1268 return false;
1269 *match = StringMatch(chars, textstrlen, pat, patlen);
1270 return true;
1271 }
1272 if (!r.popFront())
1273 return false;
1274 }
1275 }
1277 /* Absolute offset from the beginning of the logical string textstr. */
1278 int pos = 0;
1280 for (JSLinearString **outerp = strs.begin(); outerp != strs.end(); ++outerp) {
1281 /* Try to find a match within 'outer'. */
1282 JSLinearString *outer = *outerp;
1283 const jschar *chars = outer->chars();
1284 size_t len = outer->length();
1285 int matchResult = StringMatch(chars, len, pat, patlen);
1286 if (matchResult != -1) {
1287 /* Matched! */
1288 *match = pos + matchResult;
1289 return true;
1290 }
1292 /* Try to find a match starting in 'outer' and running into other nodes. */
1293 const jschar *const text = chars + (patlen > len ? 0 : len - patlen + 1);
1294 const jschar *const textend = chars + len;
1295 const jschar p0 = *pat;
1296 const jschar *const p1 = pat + 1;
1297 const jschar *const patend = pat + patlen;
1298 for (const jschar *t = text; t != textend; ) {
1299 if (*t++ != p0)
1300 continue;
1301 JSLinearString **innerp = outerp;
1302 const jschar *ttend = textend;
1303 for (const jschar *pp = p1, *tt = t; pp != patend; ++pp, ++tt) {
1304 while (tt == ttend) {
1305 if (++innerp == strs.end()) {
1306 *match = -1;
1307 return true;
1308 }
1309 JSLinearString *inner = *innerp;
1310 tt = inner->chars();
1311 ttend = tt + inner->length();
1312 }
1313 if (*pp != *tt)
1314 goto break_continue;
1315 }
1317 /* Matched! */
1318 *match = pos + (t - chars) - 1; /* -1 because of *t++ above */
1319 return true;
1321 break_continue:;
1322 }
1324 pos += len;
1325 }
1327 *match = -1;
1328 return true;
1329 }
1331 /* ES6 20121026 draft 15.5.4.24. */
1332 static bool
1333 str_contains(JSContext *cx, unsigned argc, Value *vp)
1334 {
1335 CallArgs args = CallArgsFromVp(argc, vp);
1337 // Steps 1, 2, and 3
1338 RootedString str(cx, ThisToStringForStringProto(cx, args));
1339 if (!str)
1340 return false;
1342 // Steps 4 and 5
1343 Rooted<JSLinearString*> searchStr(cx, ArgToRootedString(cx, args, 0));
1344 if (!searchStr)
1345 return false;
1347 // Steps 6 and 7
1348 uint32_t pos = 0;
1349 if (args.hasDefined(1)) {
1350 if (args[1].isInt32()) {
1351 int i = args[1].toInt32();
1352 pos = (i < 0) ? 0U : uint32_t(i);
1353 } else {
1354 double d;
1355 if (!ToInteger(cx, args[1], &d))
1356 return false;
1357 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1358 }
1359 }
1361 // Step 8
1362 uint32_t textLen = str->length();
1363 const jschar *textChars = str->getChars(cx);
1364 if (!textChars)
1365 return false;
1367 // Step 9
1368 uint32_t start = Min(Max(pos, 0U), textLen);
1370 // Step 10
1371 uint32_t searchLen = searchStr->length();
1372 const jschar *searchChars = searchStr->chars();
1374 // Step 11
1375 textChars += start;
1376 textLen -= start;
1377 int match = StringMatch(textChars, textLen, searchChars, searchLen);
1378 args.rval().setBoolean(match != -1);
1379 return true;
1380 }
1382 /* ES6 20120927 draft 15.5.4.7. */
1383 static bool
1384 str_indexOf(JSContext *cx, unsigned argc, Value *vp)
1385 {
1386 CallArgs args = CallArgsFromVp(argc, vp);
1388 // Steps 1, 2, and 3
1389 RootedString str(cx, ThisToStringForStringProto(cx, args));
1390 if (!str)
1391 return false;
1393 // Steps 4 and 5
1394 Rooted<JSLinearString*> searchStr(cx, ArgToRootedString(cx, args, 0));
1395 if (!searchStr)
1396 return false;
1398 // Steps 6 and 7
1399 uint32_t pos = 0;
1400 if (args.hasDefined(1)) {
1401 if (args[1].isInt32()) {
1402 int i = args[1].toInt32();
1403 pos = (i < 0) ? 0U : uint32_t(i);
1404 } else {
1405 double d;
1406 if (!ToInteger(cx, args[1], &d))
1407 return false;
1408 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1409 }
1410 }
1412 // Step 8
1413 uint32_t textLen = str->length();
1414 const jschar *textChars = str->getChars(cx);
1415 if (!textChars)
1416 return false;
1418 // Step 9
1419 uint32_t start = Min(Max(pos, 0U), textLen);
1421 // Step 10
1422 uint32_t searchLen = searchStr->length();
1423 const jschar *searchChars = searchStr->chars();
1425 // Step 11
1426 textChars += start;
1427 textLen -= start;
1428 int match = StringMatch(textChars, textLen, searchChars, searchLen);
1429 args.rval().setInt32((match == -1) ? -1 : start + match);
1430 return true;
1431 }
1433 static bool
1434 str_lastIndexOf(JSContext *cx, unsigned argc, Value *vp)
1435 {
1436 CallArgs args = CallArgsFromVp(argc, vp);
1437 RootedString textstr(cx, ThisToStringForStringProto(cx, args));
1438 if (!textstr)
1439 return false;
1441 size_t textlen = textstr->length();
1443 Rooted<JSLinearString*> patstr(cx, ArgToRootedString(cx, args, 0));
1444 if (!patstr)
1445 return false;
1447 size_t patlen = patstr->length();
1449 int i = textlen - patlen; // Start searching here
1450 if (i < 0) {
1451 args.rval().setInt32(-1);
1452 return true;
1453 }
1455 if (args.length() > 1) {
1456 if (args[1].isInt32()) {
1457 int j = args[1].toInt32();
1458 if (j <= 0)
1459 i = 0;
1460 else if (j < i)
1461 i = j;
1462 } else {
1463 double d;
1464 if (!ToNumber(cx, args[1], &d))
1465 return false;
1466 if (!IsNaN(d)) {
1467 d = ToInteger(d);
1468 if (d <= 0)
1469 i = 0;
1470 else if (d < i)
1471 i = (int)d;
1472 }
1473 }
1474 }
1476 if (patlen == 0) {
1477 args.rval().setInt32(i);
1478 return true;
1479 }
1481 const jschar *text = textstr->getChars(cx);
1482 if (!text)
1483 return false;
1485 const jschar *pat = patstr->chars();
1487 const jschar *t = text + i;
1488 const jschar *textend = text - 1;
1489 const jschar p0 = *pat;
1490 const jschar *patNext = pat + 1;
1491 const jschar *patEnd = pat + patlen;
1493 for (; t != textend; --t) {
1494 if (*t == p0) {
1495 const jschar *t1 = t + 1;
1496 for (const jschar *p1 = patNext; p1 != patEnd; ++p1, ++t1) {
1497 if (*t1 != *p1)
1498 goto break_continue;
1499 }
1500 args.rval().setInt32(t - text);
1501 return true;
1502 }
1503 break_continue:;
1504 }
1506 args.rval().setInt32(-1);
1507 return true;
1508 }
1510 /* ES6 20131108 draft 21.1.3.18. */
1511 static bool
1512 str_startsWith(JSContext *cx, unsigned argc, Value *vp)
1513 {
1514 CallArgs args = CallArgsFromVp(argc, vp);
1516 // Steps 1, 2, and 3
1517 RootedString str(cx, ThisToStringForStringProto(cx, args));
1518 if (!str)
1519 return false;
1521 // Step 4
1522 if (args.get(0).isObject() && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1523 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1524 "first", "", "Regular Expression");
1525 return false;
1526 }
1528 // Steps 5 and 6
1529 Rooted<JSLinearString*> searchStr(cx, ArgToRootedString(cx, args, 0));
1530 if (!searchStr)
1531 return false;
1533 // Steps 7 and 8
1534 uint32_t pos = 0;
1535 if (args.hasDefined(1)) {
1536 if (args[1].isInt32()) {
1537 int i = args[1].toInt32();
1538 pos = (i < 0) ? 0U : uint32_t(i);
1539 } else {
1540 double d;
1541 if (!ToInteger(cx, args[1], &d))
1542 return false;
1543 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1544 }
1545 }
1547 // Step 9
1548 uint32_t textLen = str->length();
1549 const jschar *textChars = str->getChars(cx);
1550 if (!textChars)
1551 return false;
1553 // Step 10
1554 uint32_t start = Min(Max(pos, 0U), textLen);
1556 // Step 11
1557 uint32_t searchLen = searchStr->length();
1558 const jschar *searchChars = searchStr->chars();
1560 // Step 12
1561 if (searchLen + start < searchLen || searchLen + start > textLen) {
1562 args.rval().setBoolean(false);
1563 return true;
1564 }
1566 // Steps 13 and 14
1567 args.rval().setBoolean(PodEqual(textChars + start, searchChars, searchLen));
1568 return true;
1569 }
1571 /* ES6 20131108 draft 21.1.3.7. */
1572 static bool
1573 str_endsWith(JSContext *cx, unsigned argc, Value *vp)
1574 {
1575 CallArgs args = CallArgsFromVp(argc, vp);
1577 // Steps 1, 2, and 3
1578 RootedString str(cx, ThisToStringForStringProto(cx, args));
1579 if (!str)
1580 return false;
1582 // Step 4
1583 if (args.get(0).isObject() && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1584 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1585 "first", "", "Regular Expression");
1586 return false;
1587 }
1589 // Steps 5 and 6
1590 Rooted<JSLinearString *> searchStr(cx, ArgToRootedString(cx, args, 0));
1591 if (!searchStr)
1592 return false;
1594 // Step 7
1595 uint32_t textLen = str->length();
1596 const jschar *textChars = str->getChars(cx);
1597 if (!textChars)
1598 return false;
1600 // Steps 8 and 9
1601 uint32_t pos = textLen;
1602 if (args.hasDefined(1)) {
1603 if (args[1].isInt32()) {
1604 int i = args[1].toInt32();
1605 pos = (i < 0) ? 0U : uint32_t(i);
1606 } else {
1607 double d;
1608 if (!ToInteger(cx, args[1], &d))
1609 return false;
1610 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1611 }
1612 }
1614 // Step 10
1615 uint32_t end = Min(Max(pos, 0U), textLen);
1617 // Step 11
1618 uint32_t searchLen = searchStr->length();
1619 const jschar *searchChars = searchStr->chars();
1621 // Step 13 (reordered)
1622 if (searchLen > end) {
1623 args.rval().setBoolean(false);
1624 return true;
1625 }
1627 // Step 12
1628 uint32_t start = end - searchLen;
1630 // Steps 14 and 15
1631 args.rval().setBoolean(PodEqual(textChars + start, searchChars, searchLen));
1632 return true;
1633 }
1635 static bool
1636 js_TrimString(JSContext *cx, Value *vp, bool trimLeft, bool trimRight)
1637 {
1638 CallReceiver call = CallReceiverFromVp(vp);
1639 RootedString str(cx, ThisToStringForStringProto(cx, call));
1640 if (!str)
1641 return false;
1642 size_t length = str->length();
1643 const jschar *chars = str->getChars(cx);
1644 if (!chars)
1645 return false;
1647 size_t begin = 0;
1648 size_t end = length;
1650 if (trimLeft) {
1651 while (begin < length && unicode::IsSpace(chars[begin]))
1652 ++begin;
1653 }
1655 if (trimRight) {
1656 while (end > begin && unicode::IsSpace(chars[end - 1]))
1657 --end;
1658 }
1660 str = js_NewDependentString(cx, str, begin, end - begin);
1661 if (!str)
1662 return false;
1664 call.rval().setString(str);
1665 return true;
1666 }
1668 static bool
1669 str_trim(JSContext *cx, unsigned argc, Value *vp)
1670 {
1671 return js_TrimString(cx, vp, true, true);
1672 }
1674 static bool
1675 str_trimLeft(JSContext *cx, unsigned argc, Value *vp)
1676 {
1677 return js_TrimString(cx, vp, true, false);
1678 }
1680 static bool
1681 str_trimRight(JSContext *cx, unsigned argc, Value *vp)
1682 {
1683 return js_TrimString(cx, vp, false, true);
1684 }
1686 /*
1687 * Perl-inspired string functions.
1688 */
1690 namespace {
1692 /* Result of a successfully performed flat match. */
1693 class FlatMatch
1694 {
1695 RootedAtom patstr;
1696 const jschar *pat;
1697 size_t patlen;
1698 int32_t match_;
1700 friend class StringRegExpGuard;
1702 public:
1703 FlatMatch(JSContext *cx) : patstr(cx) {}
1704 JSLinearString *pattern() const { return patstr; }
1705 size_t patternLength() const { return patlen; }
1707 /*
1708 * Note: The match is -1 when the match is performed successfully,
1709 * but no match is found.
1710 */
1711 int32_t match() const { return match_; }
1712 };
1714 } /* anonymous namespace */
1716 static inline bool
1717 IsRegExpMetaChar(jschar c)
1718 {
1719 switch (c) {
1720 /* Taken from the PatternCharacter production in 15.10.1. */
1721 case '^': case '$': case '\\': case '.': case '*': case '+':
1722 case '?': case '(': case ')': case '[': case ']': case '{':
1723 case '}': case '|':
1724 return true;
1725 default:
1726 return false;
1727 }
1728 }
1730 static inline bool
1731 HasRegExpMetaChars(const jschar *chars, size_t length)
1732 {
1733 for (size_t i = 0; i < length; ++i) {
1734 if (IsRegExpMetaChar(chars[i]))
1735 return true;
1736 }
1737 return false;
1738 }
1740 namespace {
1742 /*
1743 * StringRegExpGuard factors logic out of String regexp operations.
1744 *
1745 * |optarg| indicates in which argument position RegExp flags will be found, if
1746 * present. This is a Mozilla extension and not part of any ECMA spec.
1747 */
1748 class MOZ_STACK_CLASS StringRegExpGuard
1749 {
1750 RegExpGuard re_;
1751 FlatMatch fm;
1752 RootedObject obj_;
1754 /*
1755 * Upper bound on the number of characters we are willing to potentially
1756 * waste on searching for RegExp meta-characters.
1757 */
1758 static const size_t MAX_FLAT_PAT_LEN = 256;
1760 static JSAtom *
1761 flattenPattern(JSContext *cx, JSAtom *patstr)
1762 {
1763 StringBuffer sb(cx);
1764 if (!sb.reserve(patstr->length()))
1765 return nullptr;
1767 static const jschar ESCAPE_CHAR = '\\';
1768 const jschar *chars = patstr->chars();
1769 size_t len = patstr->length();
1770 for (const jschar *it = chars; it != chars + len; ++it) {
1771 if (IsRegExpMetaChar(*it)) {
1772 if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
1773 return nullptr;
1774 } else {
1775 if (!sb.append(*it))
1776 return nullptr;
1777 }
1778 }
1779 return sb.finishAtom();
1780 }
1782 public:
1783 StringRegExpGuard(JSContext *cx)
1784 : re_(cx), fm(cx), obj_(cx)
1785 { }
1787 /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
1788 bool init(JSContext *cx, CallArgs args, bool convertVoid = false)
1789 {
1790 if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx))
1791 return init(cx, &args[0].toObject());
1793 if (convertVoid && !args.hasDefined(0)) {
1794 fm.patstr = cx->runtime()->emptyString;
1795 return true;
1796 }
1798 JSString *arg = ArgToRootedString(cx, args, 0);
1799 if (!arg)
1800 return false;
1802 fm.patstr = AtomizeString(cx, arg);
1803 if (!fm.patstr)
1804 return false;
1806 return true;
1807 }
1809 bool init(JSContext *cx, JSObject *regexp) {
1810 obj_ = regexp;
1812 JS_ASSERT(ObjectClassIs(obj_, ESClass_RegExp, cx));
1814 if (!RegExpToShared(cx, obj_, &re_))
1815 return false;
1816 return true;
1817 }
1819 bool init(JSContext *cx, HandleString pattern) {
1820 fm.patstr = AtomizeString(cx, pattern);
1821 if (!fm.patstr)
1822 return false;
1823 return true;
1824 }
1826 /*
1827 * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
1828 * the pattern string, or a lengthy pattern string can thwart this process.
1829 *
1830 * |checkMetaChars| looks for regexp metachars in the pattern string.
1831 *
1832 * Return whether flat matching could be used.
1833 *
1834 * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
1835 * cx->isExceptionPending().
1836 */
1837 const FlatMatch *
1838 tryFlatMatch(JSContext *cx, JSString *textstr, unsigned optarg, unsigned argc,
1839 bool checkMetaChars = true)
1840 {
1841 if (re_.initialized())
1842 return nullptr;
1844 fm.pat = fm.patstr->chars();
1845 fm.patlen = fm.patstr->length();
1847 if (optarg < argc)
1848 return nullptr;
1850 if (checkMetaChars &&
1851 (fm.patlen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat, fm.patlen))) {
1852 return nullptr;
1853 }
1855 /*
1856 * textstr could be a rope, so we want to avoid flattening it for as
1857 * long as possible.
1858 */
1859 if (textstr->isRope()) {
1860 if (!RopeMatch(cx, textstr, fm.pat, fm.patlen, &fm.match_))
1861 return nullptr;
1862 } else {
1863 const jschar *text = textstr->asLinear().chars();
1864 size_t textlen = textstr->length();
1865 fm.match_ = StringMatch(text, textlen, fm.pat, fm.patlen);
1866 }
1867 return &fm;
1868 }
1870 /* If the pattern is not already a regular expression, make it so. */
1871 bool normalizeRegExp(JSContext *cx, bool flat, unsigned optarg, CallArgs args)
1872 {
1873 if (re_.initialized())
1874 return true;
1876 /* Build RegExp from pattern string. */
1877 RootedString opt(cx);
1878 if (optarg < args.length()) {
1879 opt = ToString<CanGC>(cx, args[optarg]);
1880 if (!opt)
1881 return false;
1882 } else {
1883 opt = nullptr;
1884 }
1886 Rooted<JSAtom *> patstr(cx);
1887 if (flat) {
1888 patstr = flattenPattern(cx, fm.patstr);
1889 if (!patstr)
1890 return false;
1891 } else {
1892 patstr = fm.patstr;
1893 }
1894 JS_ASSERT(patstr);
1896 return cx->compartment()->regExps.get(cx, patstr, opt, &re_);
1897 }
1899 bool zeroLastIndex(JSContext *cx) {
1900 if (!regExpIsObject())
1901 return true;
1903 // Use a fast path for same-global RegExp objects with writable
1904 // lastIndex.
1905 if (obj_->is<RegExpObject>() && obj_->nativeLookup(cx, cx->names().lastIndex)->writable()) {
1906 obj_->as<RegExpObject>().zeroLastIndex();
1907 return true;
1908 }
1910 // Handle everything else generically (including throwing if .lastIndex is non-writable).
1911 RootedValue zero(cx, Int32Value(0));
1912 return JSObject::setProperty(cx, obj_, obj_, cx->names().lastIndex, &zero, true);
1913 }
1915 RegExpShared ®Exp() { return *re_; }
1917 bool regExpIsObject() { return obj_ != nullptr; }
1918 HandleObject regExpObject() {
1919 JS_ASSERT(regExpIsObject());
1920 return obj_;
1921 }
1923 private:
1924 StringRegExpGuard(const StringRegExpGuard &) MOZ_DELETE;
1925 void operator=(const StringRegExpGuard &) MOZ_DELETE;
1926 };
1928 } /* anonymous namespace */
1930 static bool
1931 DoMatchLocal(JSContext *cx, CallArgs args, RegExpStatics *res, Handle<JSLinearString*> input,
1932 RegExpShared &re)
1933 {
1934 size_t charsLen = input->length();
1935 const jschar *chars = input->chars();
1937 size_t i = 0;
1938 ScopedMatchPairs matches(&cx->tempLifoAlloc());
1939 RegExpRunStatus status = re.execute(cx, chars, charsLen, &i, matches);
1940 if (status == RegExpRunStatus_Error)
1941 return false;
1943 if (status == RegExpRunStatus_Success_NotFound) {
1944 args.rval().setNull();
1945 return true;
1946 }
1948 if (!res->updateFromMatchPairs(cx, input, matches))
1949 return false;
1951 RootedValue rval(cx);
1952 if (!CreateRegExpMatchResult(cx, input, matches, &rval))
1953 return false;
1955 args.rval().set(rval);
1956 return true;
1957 }
1959 /* ES5 15.5.4.10 step 8. */
1960 static bool
1961 DoMatchGlobal(JSContext *cx, CallArgs args, RegExpStatics *res, Handle<JSLinearString*> input,
1962 StringRegExpGuard &g)
1963 {
1964 // Step 8a.
1965 //
1966 // This single zeroing of "lastIndex" covers all "lastIndex" changes in the
1967 // rest of String.prototype.match, particularly in steps 8f(i) and
1968 // 8f(iii)(2)(a). Here's why.
1969 //
1970 // The inputs to the calls to RegExp.prototype.exec are a RegExp object
1971 // whose .global is true and a string. The only side effect of a call in
1972 // these circumstances is that the RegExp's .lastIndex will be modified to
1973 // the next starting index after the discovered match (or to 0 if there's
1974 // no remaining match). Because .lastIndex is a non-configurable data
1975 // property and no script-controllable code executes after step 8a, passing
1976 // step 8a implies *every* .lastIndex set succeeds. String.prototype.match
1977 // calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
1978 // so the final value of .lastIndex is 0: exactly the state after step 8a
1979 // succeeds. No spec step lets script observe intermediate .lastIndex
1980 // values.
1981 //
1982 // The arrays returned by RegExp.prototype.exec always have a string at
1983 // index 0, for which [[Get]]s have no side effects.
1984 //
1985 // Filling in a new array using [[DefineOwnProperty]] is unobservable.
1986 //
1987 // This is a tricky point, because after this set, our implementation *can*
1988 // fail. The key is that script can't distinguish these failure modes from
1989 // one where, in spec terms, we fail immediately after step 8a. That *in
1990 // reality* we might have done extra matching work, or created a partial
1991 // results array to return, or hit an interrupt, is irrelevant. The
1992 // script can't tell we did any of those things but didn't update
1993 // .lastIndex. Thus we can optimize steps 8b onward however we want,
1994 // including eliminating intermediate .lastIndex sets, as long as we don't
1995 // add ways for script to observe the intermediate states.
1996 //
1997 // In short: it's okay to cheat (by setting .lastIndex to 0, once) because
1998 // we can't get caught.
1999 if (!g.zeroLastIndex(cx))
2000 return false;
2002 // Step 8b.
2003 AutoValueVector elements(cx);
2005 size_t lastSuccessfulStart = 0;
2007 // The loop variables from steps 8c-e aren't needed, as we use different
2008 // techniques from the spec to implement step 8f's loop.
2010 // Step 8f.
2011 MatchPair match;
2012 size_t charsLen = input->length();
2013 const jschar *chars = input->chars();
2014 RegExpShared &re = g.regExp();
2015 for (size_t searchIndex = 0; searchIndex <= charsLen; ) {
2016 if (!CheckForInterrupt(cx))
2017 return false;
2019 // Steps 8f(i-ii), minus "lastIndex" updates (see above).
2020 size_t nextSearchIndex = searchIndex;
2021 RegExpRunStatus status = re.executeMatchOnly(cx, chars, charsLen, &nextSearchIndex, match);
2022 if (status == RegExpRunStatus_Error)
2023 return false;
2025 // Step 8f(ii).
2026 if (status == RegExpRunStatus_Success_NotFound)
2027 break;
2029 lastSuccessfulStart = searchIndex;
2031 // Steps 8f(iii)(1-3).
2032 searchIndex = match.isEmpty() ? nextSearchIndex + 1 : nextSearchIndex;
2034 // Step 8f(iii)(4-5).
2035 JSLinearString *str = js_NewDependentString(cx, input, match.start, match.length());
2036 if (!str)
2037 return false;
2038 if (!elements.append(StringValue(str)))
2039 return false;
2040 }
2042 // Step 8g.
2043 if (elements.empty()) {
2044 args.rval().setNull();
2045 return true;
2046 }
2048 // The last *successful* match updates the RegExpStatics. (Interestingly,
2049 // this implies that String.prototype.match's semantics aren't those
2050 // implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
2051 res->updateLazily(cx, input, &re, lastSuccessfulStart);
2053 // Steps 8b, 8f(iii)(5-6), 8h.
2054 JSObject *array = NewDenseCopiedArray(cx, elements.length(), elements.begin());
2055 if (!array)
2056 return false;
2058 args.rval().setObject(*array);
2059 return true;
2060 }
2062 static bool
2063 BuildFlatMatchArray(JSContext *cx, HandleString textstr, const FlatMatch &fm, CallArgs *args)
2064 {
2065 if (fm.match() < 0) {
2066 args->rval().setNull();
2067 return true;
2068 }
2070 /* For this non-global match, produce a RegExp.exec-style array. */
2071 RootedObject obj(cx, NewDenseEmptyArray(cx));
2072 if (!obj)
2073 return false;
2075 RootedValue patternVal(cx, StringValue(fm.pattern()));
2076 RootedValue matchVal(cx, Int32Value(fm.match()));
2077 RootedValue textVal(cx, StringValue(textstr));
2079 if (!JSObject::defineElement(cx, obj, 0, patternVal) ||
2080 !JSObject::defineProperty(cx, obj, cx->names().index, matchVal) ||
2081 !JSObject::defineProperty(cx, obj, cx->names().input, textVal))
2082 {
2083 return false;
2084 }
2086 args->rval().setObject(*obj);
2087 return true;
2088 }
2090 /* ES5 15.5.4.10. */
2091 bool
2092 js::str_match(JSContext *cx, unsigned argc, Value *vp)
2093 {
2094 CallArgs args = CallArgsFromVp(argc, vp);
2096 /* Steps 1-2. */
2097 RootedString str(cx, ThisToStringForStringProto(cx, args));
2098 if (!str)
2099 return false;
2101 /* Steps 3-4, plus the trailing-argument "flags" extension. */
2102 StringRegExpGuard g(cx);
2103 if (!g.init(cx, args, true))
2104 return false;
2106 /* Fast path when the search pattern can be searched for as a string. */
2107 if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length()))
2108 return BuildFlatMatchArray(cx, str, *fm, &args);
2110 /* Return if there was an error in tryFlatMatch. */
2111 if (cx->isExceptionPending())
2112 return false;
2114 /* Create regular-expression internals as needed to perform the match. */
2115 if (!g.normalizeRegExp(cx, false, 1, args))
2116 return false;
2118 RegExpStatics *res = cx->global()->getRegExpStatics();
2119 Rooted<JSLinearString*> linearStr(cx, str->ensureLinear(cx));
2120 if (!linearStr)
2121 return false;
2123 /* Steps 5-6, 7. */
2124 if (!g.regExp().global())
2125 return DoMatchLocal(cx, args, res, linearStr, g.regExp());
2127 /* Steps 6, 8. */
2128 return DoMatchGlobal(cx, args, res, linearStr, g);
2129 }
2131 bool
2132 js::str_search(JSContext *cx, unsigned argc, Value *vp)
2133 {
2134 CallArgs args = CallArgsFromVp(argc, vp);
2135 RootedString str(cx, ThisToStringForStringProto(cx, args));
2136 if (!str)
2137 return false;
2139 StringRegExpGuard g(cx);
2140 if (!g.init(cx, args, true))
2141 return false;
2142 if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length())) {
2143 args.rval().setInt32(fm->match());
2144 return true;
2145 }
2147 if (cx->isExceptionPending()) /* from tryFlatMatch */
2148 return false;
2150 if (!g.normalizeRegExp(cx, false, 1, args))
2151 return false;
2153 Rooted<JSLinearString*> linearStr(cx, str->ensureLinear(cx));
2154 if (!linearStr)
2155 return false;
2157 const jschar *chars = linearStr->chars();
2158 size_t length = linearStr->length();
2159 RegExpStatics *res = cx->global()->getRegExpStatics();
2161 /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
2162 size_t i = 0;
2163 MatchPair match;
2165 RegExpRunStatus status = g.regExp().executeMatchOnly(cx, chars, length, &i, match);
2166 if (status == RegExpRunStatus_Error)
2167 return false;
2169 if (status == RegExpRunStatus_Success)
2170 res->updateLazily(cx, linearStr, &g.regExp(), 0);
2172 JS_ASSERT_IF(status == RegExpRunStatus_Success_NotFound, match.start == -1);
2173 args.rval().setInt32(match.start);
2174 return true;
2175 }
2177 // Utility for building a rope (lazy concatenation) of strings.
2178 class RopeBuilder {
2179 JSContext *cx;
2180 RootedString res;
2182 RopeBuilder(const RopeBuilder &other) MOZ_DELETE;
2183 void operator=(const RopeBuilder &other) MOZ_DELETE;
2185 public:
2186 RopeBuilder(JSContext *cx)
2187 : cx(cx), res(cx, cx->runtime()->emptyString)
2188 {}
2190 inline bool append(HandleString str) {
2191 res = ConcatStrings<CanGC>(cx, res, str);
2192 return !!res;
2193 }
2195 inline JSString *result() {
2196 return res;
2197 }
2198 };
2200 namespace {
2202 struct ReplaceData
2203 {
2204 ReplaceData(JSContext *cx)
2205 : str(cx), g(cx), lambda(cx), elembase(cx), repstr(cx),
2206 fig(cx, NullValue()), sb(cx)
2207 {}
2209 inline void setReplacementString(JSLinearString *string) {
2210 JS_ASSERT(string);
2211 lambda = nullptr;
2212 elembase = nullptr;
2213 repstr = string;
2215 /* We're about to store pointers into the middle of our string. */
2216 dollarEnd = repstr->chars() + repstr->length();
2217 dollar = js_strchr_limit(repstr->chars(), '$', dollarEnd);
2218 }
2220 inline void setReplacementFunction(JSObject *func) {
2221 JS_ASSERT(func);
2222 lambda = func;
2223 elembase = nullptr;
2224 repstr = nullptr;
2225 dollar = dollarEnd = nullptr;
2226 }
2228 RootedString str; /* 'this' parameter object as a string */
2229 StringRegExpGuard g; /* regexp parameter object and private data */
2230 RootedObject lambda; /* replacement function object or null */
2231 RootedObject elembase; /* object for function(a){return b[a]} replace */
2232 Rooted<JSLinearString*> repstr; /* replacement string */
2233 const jschar *dollar; /* null or pointer to first $ in repstr */
2234 const jschar *dollarEnd; /* limit pointer for js_strchr_limit */
2235 int leftIndex; /* left context index in str->chars */
2236 JSSubString dollarStr; /* for "$$" InterpretDollar result */
2237 bool calledBack; /* record whether callback has been called */
2238 FastInvokeGuard fig; /* used for lambda calls, also holds arguments */
2239 StringBuffer sb; /* buffer built during DoMatch */
2240 };
2242 } /* anonymous namespace */
2244 static bool
2245 ReplaceRegExp(JSContext *cx, RegExpStatics *res, ReplaceData &rdata);
2247 static bool
2248 DoMatchForReplaceLocal(JSContext *cx, RegExpStatics *res, Handle<JSLinearString*> linearStr,
2249 RegExpShared &re, ReplaceData &rdata)
2250 {
2251 size_t charsLen = linearStr->length();
2252 size_t i = 0;
2253 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2254 RegExpRunStatus status = re.execute(cx, linearStr->chars(), charsLen, &i, matches);
2255 if (status == RegExpRunStatus_Error)
2256 return false;
2258 if (status == RegExpRunStatus_Success_NotFound)
2259 return true;
2261 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2262 return false;
2264 return ReplaceRegExp(cx, res, rdata);
2265 }
2267 static bool
2268 DoMatchForReplaceGlobal(JSContext *cx, RegExpStatics *res, Handle<JSLinearString*> linearStr,
2269 RegExpShared &re, ReplaceData &rdata)
2270 {
2271 size_t charsLen = linearStr->length();
2272 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2273 for (size_t count = 0, i = 0; i <= charsLen; ++count) {
2274 if (!CheckForInterrupt(cx))
2275 return false;
2277 RegExpRunStatus status = re.execute(cx, linearStr->chars(), charsLen, &i, matches);
2278 if (status == RegExpRunStatus_Error)
2279 return false;
2281 if (status == RegExpRunStatus_Success_NotFound)
2282 break;
2284 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2285 return false;
2287 if (!ReplaceRegExp(cx, res, rdata))
2288 return false;
2289 if (!res->matched())
2290 ++i;
2291 }
2293 return true;
2294 }
2296 static bool
2297 InterpretDollar(RegExpStatics *res, const jschar *dp, const jschar *ep,
2298 ReplaceData &rdata, JSSubString *out, size_t *skip)
2299 {
2300 JS_ASSERT(*dp == '$');
2302 /* If there is only a dollar, bail now */
2303 if (dp + 1 >= ep)
2304 return false;
2306 /* Interpret all Perl match-induced dollar variables. */
2307 jschar dc = dp[1];
2308 if (JS7_ISDEC(dc)) {
2309 /* ECMA-262 Edition 3: 1-9 or 01-99 */
2310 unsigned num = JS7_UNDEC(dc);
2311 if (num > res->getMatches().parenCount())
2312 return false;
2314 const jschar *cp = dp + 2;
2315 if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
2316 unsigned tmp = 10 * num + JS7_UNDEC(dc);
2317 if (tmp <= res->getMatches().parenCount()) {
2318 cp++;
2319 num = tmp;
2320 }
2321 }
2322 if (num == 0)
2323 return false;
2325 *skip = cp - dp;
2327 JS_ASSERT(num <= res->getMatches().parenCount());
2329 /*
2330 * Note: we index to get the paren with the (1-indexed) pair
2331 * number, as opposed to a (0-indexed) paren number.
2332 */
2333 res->getParen(num, out);
2334 return true;
2335 }
2337 *skip = 2;
2338 switch (dc) {
2339 case '$':
2340 rdata.dollarStr.chars = dp;
2341 rdata.dollarStr.length = 1;
2342 *out = rdata.dollarStr;
2343 return true;
2344 case '&':
2345 res->getLastMatch(out);
2346 return true;
2347 case '+':
2348 res->getLastParen(out);
2349 return true;
2350 case '`':
2351 res->getLeftContext(out);
2352 return true;
2353 case '\'':
2354 res->getRightContext(out);
2355 return true;
2356 }
2357 return false;
2358 }
2360 static bool
2361 FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
2362 {
2363 if (rdata.elembase) {
2364 /*
2365 * The base object is used when replace was passed a lambda which looks like
2366 * 'function(a) { return b[a]; }' for the base object b. b will not change
2367 * in the course of the replace unless we end up making a scripted call due
2368 * to accessing a scripted getter or a value with a scripted toString.
2369 */
2370 JS_ASSERT(rdata.lambda);
2371 JS_ASSERT(!rdata.elembase->getOps()->lookupProperty);
2372 JS_ASSERT(!rdata.elembase->getOps()->getProperty);
2374 RootedValue match(cx);
2375 if (!res->createLastMatch(cx, &match))
2376 return false;
2377 JSAtom *atom = ToAtom<CanGC>(cx, match);
2378 if (!atom)
2379 return false;
2381 RootedValue v(cx);
2382 if (HasDataProperty(cx, rdata.elembase, AtomToId(atom), v.address()) && v.isString()) {
2383 rdata.repstr = v.toString()->ensureLinear(cx);
2384 if (!rdata.repstr)
2385 return false;
2386 *sizep = rdata.repstr->length();
2387 return true;
2388 }
2390 /*
2391 * Couldn't handle this property, fall through and despecialize to the
2392 * general lambda case.
2393 */
2394 rdata.elembase = nullptr;
2395 }
2397 if (rdata.lambda) {
2398 RootedObject lambda(cx, rdata.lambda);
2399 PreserveRegExpStatics staticsGuard(cx, res);
2400 if (!staticsGuard.init(cx))
2401 return false;
2403 /*
2404 * In the lambda case, not only do we find the replacement string's
2405 * length, we compute repstr and return it via rdata for use within
2406 * DoReplace. The lambda is called with arguments ($&, $1, $2, ...,
2407 * index, input), i.e., all the properties of a regexp match array.
2408 * For $&, etc., we must create string jsvals from cx->regExpStatics.
2409 * We grab up stack space to keep the newborn strings GC-rooted.
2410 */
2411 unsigned p = res->getMatches().parenCount();
2412 unsigned argc = 1 + p + 2;
2414 InvokeArgs &args = rdata.fig.args();
2415 if (!args.init(argc))
2416 return false;
2418 args.setCallee(ObjectValue(*lambda));
2419 args.setThis(UndefinedValue());
2421 /* Push $&, $1, $2, ... */
2422 unsigned argi = 0;
2423 if (!res->createLastMatch(cx, args[argi++]))
2424 return false;
2426 for (size_t i = 0; i < res->getMatches().parenCount(); ++i) {
2427 if (!res->createParen(cx, i + 1, args[argi++]))
2428 return false;
2429 }
2431 /* Push match index and input string. */
2432 args[argi++].setInt32(res->getMatches()[0].start);
2433 args[argi].setString(rdata.str);
2435 if (!rdata.fig.invoke(cx))
2436 return false;
2438 /* root repstr: rdata is on the stack, so scanned by conservative gc. */
2439 JSString *repstr = ToString<CanGC>(cx, args.rval());
2440 if (!repstr)
2441 return false;
2442 rdata.repstr = repstr->ensureLinear(cx);
2443 if (!rdata.repstr)
2444 return false;
2445 *sizep = rdata.repstr->length();
2446 return true;
2447 }
2449 JSString *repstr = rdata.repstr;
2450 CheckedInt<uint32_t> replen = repstr->length();
2451 for (const jschar *dp = rdata.dollar, *ep = rdata.dollarEnd; dp;
2452 dp = js_strchr_limit(dp, '$', ep)) {
2453 JSSubString sub;
2454 size_t skip;
2455 if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
2456 if (sub.length > skip)
2457 replen += sub.length - skip;
2458 else
2459 replen -= skip - sub.length;
2460 dp += skip;
2461 } else {
2462 dp++;
2463 }
2464 }
2466 if (!replen.isValid()) {
2467 js_ReportAllocationOverflow(cx);
2468 return false;
2469 }
2471 *sizep = replen.value();
2472 return true;
2473 }
2475 /*
2476 * Precondition: |rdata.sb| already has necessary growth space reserved (as
2477 * derived from FindReplaceLength).
2478 */
2479 static void
2480 DoReplace(RegExpStatics *res, ReplaceData &rdata)
2481 {
2482 JSLinearString *repstr = rdata.repstr;
2483 const jschar *cp;
2484 const jschar *bp = cp = repstr->chars();
2486 const jschar *dp = rdata.dollar;
2487 const jschar *ep = rdata.dollarEnd;
2488 for (; dp; dp = js_strchr_limit(dp, '$', ep)) {
2489 /* Move one of the constant portions of the replacement value. */
2490 size_t len = dp - cp;
2491 rdata.sb.infallibleAppend(cp, len);
2492 cp = dp;
2494 JSSubString sub;
2495 size_t skip;
2496 if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
2497 len = sub.length;
2498 rdata.sb.infallibleAppend(sub.chars, len);
2499 cp += skip;
2500 dp += skip;
2501 } else {
2502 dp++;
2503 }
2504 }
2505 rdata.sb.infallibleAppend(cp, repstr->length() - (cp - bp));
2506 }
2508 static bool
2509 ReplaceRegExp(JSContext *cx, RegExpStatics *res, ReplaceData &rdata)
2510 {
2512 const MatchPair &match = res->getMatches()[0];
2513 JS_ASSERT(!match.isUndefined());
2514 JS_ASSERT(match.limit >= match.start && match.limit >= 0);
2516 rdata.calledBack = true;
2517 size_t leftoff = rdata.leftIndex;
2518 size_t leftlen = match.start - leftoff;
2519 rdata.leftIndex = match.limit;
2521 size_t replen = 0; /* silence 'unused' warning */
2522 if (!FindReplaceLength(cx, res, rdata, &replen))
2523 return false;
2525 CheckedInt<uint32_t> newlen(rdata.sb.length());
2526 newlen += leftlen;
2527 newlen += replen;
2528 if (!newlen.isValid()) {
2529 js_ReportAllocationOverflow(cx);
2530 return false;
2531 }
2532 if (!rdata.sb.reserve(newlen.value()))
2533 return false;
2535 JSLinearString &str = rdata.str->asLinear(); /* flattened for regexp */
2536 const jschar *left = str.chars() + leftoff;
2538 rdata.sb.infallibleAppend(left, leftlen); /* skipped-over portion of the search value */
2539 DoReplace(res, rdata);
2540 return true;
2541 }
2543 static bool
2544 BuildFlatReplacement(JSContext *cx, HandleString textstr, HandleString repstr,
2545 const FlatMatch &fm, MutableHandleValue rval)
2546 {
2547 RopeBuilder builder(cx);
2548 size_t match = fm.match();
2549 size_t matchEnd = match + fm.patternLength();
2551 if (textstr->isRope()) {
2552 /*
2553 * If we are replacing over a rope, avoid flattening it by iterating
2554 * through it, building a new rope.
2555 */
2556 StringSegmentRange r(cx);
2557 if (!r.init(textstr))
2558 return false;
2559 size_t pos = 0;
2560 while (!r.empty()) {
2561 RootedString str(cx, r.front());
2562 size_t len = str->length();
2563 size_t strEnd = pos + len;
2564 if (pos < matchEnd && strEnd > match) {
2565 /*
2566 * We need to special-case any part of the rope that overlaps
2567 * with the replacement string.
2568 */
2569 if (match >= pos) {
2570 /*
2571 * If this part of the rope overlaps with the left side of
2572 * the pattern, then it must be the only one to overlap with
2573 * the first character in the pattern, so we include the
2574 * replacement string here.
2575 */
2576 RootedString leftSide(cx, js_NewDependentString(cx, str, 0, match - pos));
2577 if (!leftSide ||
2578 !builder.append(leftSide) ||
2579 !builder.append(repstr)) {
2580 return false;
2581 }
2582 }
2584 /*
2585 * If str runs off the end of the matched string, append the
2586 * last part of str.
2587 */
2588 if (strEnd > matchEnd) {
2589 RootedString rightSide(cx, js_NewDependentString(cx, str, matchEnd - pos,
2590 strEnd - matchEnd));
2591 if (!rightSide || !builder.append(rightSide))
2592 return false;
2593 }
2594 } else {
2595 if (!builder.append(str))
2596 return false;
2597 }
2598 pos += str->length();
2599 if (!r.popFront())
2600 return false;
2601 }
2602 } else {
2603 RootedString leftSide(cx, js_NewDependentString(cx, textstr, 0, match));
2604 if (!leftSide)
2605 return false;
2606 RootedString rightSide(cx);
2607 rightSide = js_NewDependentString(cx, textstr, match + fm.patternLength(),
2608 textstr->length() - match - fm.patternLength());
2609 if (!rightSide ||
2610 !builder.append(leftSide) ||
2611 !builder.append(repstr) ||
2612 !builder.append(rightSide)) {
2613 return false;
2614 }
2615 }
2617 rval.setString(builder.result());
2618 return true;
2619 }
2621 /*
2622 * Perform a linear-scan dollar substitution on the replacement text,
2623 * constructing a result string that looks like:
2624 *
2625 * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
2626 */
2627 static inline bool
2628 BuildDollarReplacement(JSContext *cx, JSString *textstrArg, JSLinearString *repstr,
2629 const jschar *firstDollar, const FlatMatch &fm, MutableHandleValue rval)
2630 {
2631 Rooted<JSLinearString*> textstr(cx, textstrArg->ensureLinear(cx));
2632 if (!textstr)
2633 return false;
2635 JS_ASSERT(repstr->chars() <= firstDollar && firstDollar < repstr->chars() + repstr->length());
2636 size_t matchStart = fm.match();
2637 size_t matchLimit = matchStart + fm.patternLength();
2639 /*
2640 * Most probably:
2641 *
2642 * len(newstr) >= len(orig) - len(match) + len(replacement)
2643 *
2644 * Note that dollar vars _could_ make the resulting text smaller than this.
2645 */
2646 StringBuffer newReplaceChars(cx);
2647 if (!newReplaceChars.reserve(textstr->length() - fm.patternLength() + repstr->length()))
2648 return false;
2650 /* Move the pre-dollar chunk in bulk. */
2651 newReplaceChars.infallibleAppend(repstr->chars(), firstDollar);
2653 /* Move the rest char-by-char, interpreting dollars as we encounter them. */
2654 #define ENSURE(__cond) if (!(__cond)) return false;
2655 const jschar *repstrLimit = repstr->chars() + repstr->length();
2656 for (const jschar *it = firstDollar; it < repstrLimit; ++it) {
2657 if (*it != '$' || it == repstrLimit - 1) {
2658 ENSURE(newReplaceChars.append(*it));
2659 continue;
2660 }
2662 switch (*(it + 1)) {
2663 case '$': /* Eat one of the dollars. */
2664 ENSURE(newReplaceChars.append(*it));
2665 break;
2666 case '&':
2667 ENSURE(newReplaceChars.append(textstr->chars() + matchStart,
2668 textstr->chars() + matchLimit));
2669 break;
2670 case '`':
2671 ENSURE(newReplaceChars.append(textstr->chars(), textstr->chars() + matchStart));
2672 break;
2673 case '\'':
2674 ENSURE(newReplaceChars.append(textstr->chars() + matchLimit,
2675 textstr->chars() + textstr->length()));
2676 break;
2677 default: /* The dollar we saw was not special (no matter what its mother told it). */
2678 ENSURE(newReplaceChars.append(*it));
2679 continue;
2680 }
2681 ++it; /* We always eat an extra char in the above switch. */
2682 }
2684 RootedString leftSide(cx, js_NewDependentString(cx, textstr, 0, matchStart));
2685 ENSURE(leftSide);
2687 RootedString newReplace(cx, newReplaceChars.finishString());
2688 ENSURE(newReplace);
2690 JS_ASSERT(textstr->length() >= matchLimit);
2691 RootedString rightSide(cx, js_NewDependentString(cx, textstr, matchLimit,
2692 textstr->length() - matchLimit));
2693 ENSURE(rightSide);
2695 RopeBuilder builder(cx);
2696 ENSURE(builder.append(leftSide) &&
2697 builder.append(newReplace) &&
2698 builder.append(rightSide));
2699 #undef ENSURE
2701 rval.setString(builder.result());
2702 return true;
2703 }
2705 struct StringRange
2706 {
2707 size_t start;
2708 size_t length;
2710 StringRange(size_t s, size_t l)
2711 : start(s), length(l)
2712 { }
2713 };
2715 static inline JSFatInlineString *
2716 FlattenSubstrings(JSContext *cx, const jschar *chars,
2717 const StringRange *ranges, size_t rangesLen, size_t outputLen)
2718 {
2719 JS_ASSERT(JSFatInlineString::lengthFits(outputLen));
2721 JSFatInlineString *str = js_NewGCFatInlineString<CanGC>(cx);
2722 if (!str)
2723 return nullptr;
2724 jschar *buf = str->init(outputLen);
2726 size_t pos = 0;
2727 for (size_t i = 0; i < rangesLen; i++) {
2728 PodCopy(buf + pos, chars + ranges[i].start, ranges[i].length);
2729 pos += ranges[i].length;
2730 }
2731 JS_ASSERT(pos == outputLen);
2733 buf[outputLen] = 0;
2734 return str;
2735 }
2737 static JSString *
2738 AppendSubstrings(JSContext *cx, Handle<JSFlatString*> flatStr,
2739 const StringRange *ranges, size_t rangesLen)
2740 {
2741 JS_ASSERT(rangesLen);
2743 /* For single substrings, construct a dependent string. */
2744 if (rangesLen == 1)
2745 return js_NewDependentString(cx, flatStr, ranges[0].start, ranges[0].length);
2747 const jschar *chars = flatStr->getChars(cx);
2748 if (!chars)
2749 return nullptr;
2751 /* Collect substrings into a rope */
2752 size_t i = 0;
2753 RopeBuilder rope(cx);
2754 RootedString part(cx, nullptr);
2755 while (i < rangesLen) {
2757 /* Find maximum range that fits in JSFatInlineString */
2758 size_t substrLen = 0;
2759 size_t end = i;
2760 for (; end < rangesLen; end++) {
2761 if (substrLen + ranges[end].length > JSFatInlineString::MAX_FAT_INLINE_LENGTH)
2762 break;
2763 substrLen += ranges[end].length;
2764 }
2766 if (i == end) {
2767 /* Not even one range fits JSFatInlineString, use DependentString */
2768 const StringRange &sr = ranges[i++];
2769 part = js_NewDependentString(cx, flatStr, sr.start, sr.length);
2770 } else {
2771 /* Copy the ranges (linearly) into a JSFatInlineString */
2772 part = FlattenSubstrings(cx, chars, ranges + i, end - i, substrLen);
2773 i = end;
2774 }
2776 if (!part)
2777 return nullptr;
2779 /* Appending to the rope permanently roots the substring. */
2780 if (!rope.append(part))
2781 return nullptr;
2782 }
2784 return rope.result();
2785 }
2787 static bool
2788 StrReplaceRegexpRemove(JSContext *cx, HandleString str, RegExpShared &re, MutableHandleValue rval)
2789 {
2790 Rooted<JSFlatString*> flatStr(cx, str->ensureFlat(cx));
2791 if (!flatStr)
2792 return false;
2794 Vector<StringRange, 16, SystemAllocPolicy> ranges;
2796 size_t charsLen = flatStr->length();
2798 MatchPair match;
2799 size_t startIndex = 0; /* Index used for iterating through the string. */
2800 size_t lastIndex = 0; /* Index after last successful match. */
2801 size_t lazyIndex = 0; /* Index before last successful match. */
2803 /* Accumulate StringRanges for unmatched substrings. */
2804 while (startIndex <= charsLen) {
2805 if (!CheckForInterrupt(cx))
2806 return false;
2808 RegExpRunStatus status =
2809 re.executeMatchOnly(cx, flatStr->chars(), charsLen, &startIndex, match);
2810 if (status == RegExpRunStatus_Error)
2811 return false;
2812 if (status == RegExpRunStatus_Success_NotFound)
2813 break;
2815 /* Include the latest unmatched substring. */
2816 if (size_t(match.start) > lastIndex) {
2817 if (!ranges.append(StringRange(lastIndex, match.start - lastIndex)))
2818 return false;
2819 }
2821 lazyIndex = lastIndex;
2822 lastIndex = startIndex;
2824 if (match.isEmpty())
2825 startIndex++;
2827 /* Non-global removal executes at most once. */
2828 if (!re.global())
2829 break;
2830 }
2832 /* If unmatched, return the input string. */
2833 if (!lastIndex) {
2834 if (startIndex > 0)
2835 cx->global()->getRegExpStatics()->updateLazily(cx, flatStr, &re, lazyIndex);
2836 rval.setString(str);
2837 return true;
2838 }
2840 /* The last successful match updates the RegExpStatics. */
2841 cx->global()->getRegExpStatics()->updateLazily(cx, flatStr, &re, lazyIndex);
2843 /* Include any remaining part of the string. */
2844 if (lastIndex < charsLen) {
2845 if (!ranges.append(StringRange(lastIndex, charsLen - lastIndex)))
2846 return false;
2847 }
2849 /* Handle the empty string before calling .begin(). */
2850 if (ranges.empty()) {
2851 rval.setString(cx->runtime()->emptyString);
2852 return true;
2853 }
2855 JSString *result = AppendSubstrings(cx, flatStr, ranges.begin(), ranges.length());
2856 if (!result)
2857 return false;
2859 rval.setString(result);
2860 return true;
2861 }
2863 static inline bool
2864 StrReplaceRegExp(JSContext *cx, ReplaceData &rdata, MutableHandleValue rval)
2865 {
2866 rdata.leftIndex = 0;
2867 rdata.calledBack = false;
2869 RegExpStatics *res = cx->global()->getRegExpStatics();
2870 RegExpShared &re = rdata.g.regExp();
2872 // The spec doesn't describe this function very clearly, so we go ahead and
2873 // assume that when the input to String.prototype.replace is a global
2874 // RegExp, calling the replacer function (assuming one was provided) takes
2875 // place only after the matching is done. See the comment at the beginning
2876 // of DoMatchGlobal explaining why we can zero the the RegExp object's
2877 // lastIndex property here.
2878 if (re.global() && !rdata.g.zeroLastIndex(cx))
2879 return false;
2881 /* Optimize removal. */
2882 if (rdata.repstr && rdata.repstr->length() == 0) {
2883 JS_ASSERT(!rdata.lambda && !rdata.elembase && !rdata.dollar);
2884 return StrReplaceRegexpRemove(cx, rdata.str, re, rval);
2885 }
2887 Rooted<JSLinearString*> linearStr(cx, rdata.str->ensureLinear(cx));
2888 if (!linearStr)
2889 return false;
2891 if (re.global()) {
2892 if (!DoMatchForReplaceGlobal(cx, res, linearStr, re, rdata))
2893 return false;
2894 } else {
2895 if (!DoMatchForReplaceLocal(cx, res, linearStr, re, rdata))
2896 return false;
2897 }
2899 if (!rdata.calledBack) {
2900 /* Didn't match, so the string is unmodified. */
2901 rval.setString(rdata.str);
2902 return true;
2903 }
2905 JSSubString sub;
2906 res->getRightContext(&sub);
2907 if (!rdata.sb.append(sub.chars, sub.length))
2908 return false;
2910 JSString *retstr = rdata.sb.finishString();
2911 if (!retstr)
2912 return false;
2914 rval.setString(retstr);
2915 return true;
2916 }
2918 static inline bool
2919 str_replace_regexp(JSContext *cx, CallArgs args, ReplaceData &rdata)
2920 {
2921 if (!rdata.g.normalizeRegExp(cx, true, 2, args))
2922 return false;
2924 return StrReplaceRegExp(cx, rdata, args.rval());
2925 }
2927 bool
2928 js::str_replace_regexp_raw(JSContext *cx, HandleString string, HandleObject regexp,
2929 HandleString replacement, MutableHandleValue rval)
2930 {
2931 /* Optimize removal, so we don't have to create ReplaceData */
2932 if (replacement->length() == 0) {
2933 StringRegExpGuard guard(cx);
2934 if (!guard.init(cx, regexp))
2935 return false;
2937 RegExpShared &re = guard.regExp();
2938 return StrReplaceRegexpRemove(cx, string, re, rval);
2939 }
2941 ReplaceData rdata(cx);
2942 rdata.str = string;
2944 JSLinearString *repl = replacement->ensureLinear(cx);
2945 if (!repl)
2946 return false;
2948 rdata.setReplacementString(repl);
2950 if (!rdata.g.init(cx, regexp))
2951 return false;
2953 return StrReplaceRegExp(cx, rdata, rval);
2954 }
2956 static inline bool
2957 StrReplaceString(JSContext *cx, ReplaceData &rdata, const FlatMatch &fm, MutableHandleValue rval)
2958 {
2959 /*
2960 * Note: we could optimize the text.length == pattern.length case if we wanted,
2961 * even in the presence of dollar metachars.
2962 */
2963 if (rdata.dollar)
2964 return BuildDollarReplacement(cx, rdata.str, rdata.repstr, rdata.dollar, fm, rval);
2965 return BuildFlatReplacement(cx, rdata.str, rdata.repstr, fm, rval);
2966 }
2968 static const uint32_t ReplaceOptArg = 2;
2970 bool
2971 js::str_replace_string_raw(JSContext *cx, HandleString string, HandleString pattern,
2972 HandleString replacement, MutableHandleValue rval)
2973 {
2974 ReplaceData rdata(cx);
2976 rdata.str = string;
2977 JSLinearString *repl = replacement->ensureLinear(cx);
2978 if (!repl)
2979 return false;
2980 rdata.setReplacementString(repl);
2982 if (!rdata.g.init(cx, pattern))
2983 return false;
2984 const FlatMatch *fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, ReplaceOptArg, false);
2986 if (fm->match() < 0) {
2987 rval.setString(string);
2988 return true;
2989 }
2991 return StrReplaceString(cx, rdata, *fm, rval);
2992 }
2994 static inline bool
2995 str_replace_flat_lambda(JSContext *cx, CallArgs outerArgs, ReplaceData &rdata, const FlatMatch &fm)
2996 {
2997 RootedString matchStr(cx, js_NewDependentString(cx, rdata.str, fm.match(), fm.patternLength()));
2998 if (!matchStr)
2999 return false;
3001 /* lambda(matchStr, matchStart, textstr) */
3002 static const uint32_t lambdaArgc = 3;
3003 if (!rdata.fig.args().init(lambdaArgc))
3004 return false;
3006 CallArgs &args = rdata.fig.args();
3007 args.setCallee(ObjectValue(*rdata.lambda));
3008 args.setThis(UndefinedValue());
3010 Value *sp = args.array();
3011 sp[0].setString(matchStr);
3012 sp[1].setInt32(fm.match());
3013 sp[2].setString(rdata.str);
3015 if (!rdata.fig.invoke(cx))
3016 return false;
3018 RootedString repstr(cx, ToString<CanGC>(cx, args.rval()));
3019 if (!repstr)
3020 return false;
3022 RootedString leftSide(cx, js_NewDependentString(cx, rdata.str, 0, fm.match()));
3023 if (!leftSide)
3024 return false;
3026 size_t matchLimit = fm.match() + fm.patternLength();
3027 RootedString rightSide(cx, js_NewDependentString(cx, rdata.str, matchLimit,
3028 rdata.str->length() - matchLimit));
3029 if (!rightSide)
3030 return false;
3032 RopeBuilder builder(cx);
3033 if (!(builder.append(leftSide) &&
3034 builder.append(repstr) &&
3035 builder.append(rightSide))) {
3036 return false;
3037 }
3039 outerArgs.rval().setString(builder.result());
3040 return true;
3041 }
3043 /*
3044 * Pattern match the script to check if it is is indexing into a particular
3045 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
3046 * such cases, which are used by javascript packers (particularly the popular
3047 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
3048 * code patterns generated by such packers here.
3049 */
3050 static bool
3051 LambdaIsGetElem(JSContext *cx, JSObject &lambda, MutableHandleObject pobj)
3052 {
3053 if (!lambda.is<JSFunction>())
3054 return true;
3056 RootedFunction fun(cx, &lambda.as<JSFunction>());
3057 if (!fun->isInterpreted())
3058 return true;
3060 JSScript *script = fun->getOrCreateScript(cx);
3061 if (!script)
3062 return false;
3064 jsbytecode *pc = script->code();
3066 /*
3067 * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
3068 * Rule out the (unlikely) possibility of a heavyweight function since it
3069 * would make our scope walk off by 1.
3070 */
3071 if (JSOp(*pc) != JSOP_GETALIASEDVAR || fun->isHeavyweight())
3072 return true;
3073 ScopeCoordinate sc(pc);
3074 ScopeObject *scope = &fun->environment()->as<ScopeObject>();
3075 for (unsigned i = 0; i < sc.hops(); ++i)
3076 scope = &scope->enclosingScope().as<ScopeObject>();
3077 Value b = scope->aliasedVar(sc);
3078 pc += JSOP_GETALIASEDVAR_LENGTH;
3080 /* Look for 'a' to be the lambda's first argument. */
3081 if (JSOp(*pc) != JSOP_GETARG || GET_ARGNO(pc) != 0)
3082 return true;
3083 pc += JSOP_GETARG_LENGTH;
3085 /* 'b[a]' */
3086 if (JSOp(*pc) != JSOP_GETELEM)
3087 return true;
3088 pc += JSOP_GETELEM_LENGTH;
3090 /* 'return b[a]' */
3091 if (JSOp(*pc) != JSOP_RETURN)
3092 return true;
3094 /* 'b' must behave like a normal object. */
3095 if (!b.isObject())
3096 return true;
3098 JSObject &bobj = b.toObject();
3099 const Class *clasp = bobj.getClass();
3100 if (!clasp->isNative() || clasp->ops.lookupProperty || clasp->ops.getProperty)
3101 return true;
3103 pobj.set(&bobj);
3104 return true;
3105 }
3107 bool
3108 js::str_replace(JSContext *cx, unsigned argc, Value *vp)
3109 {
3110 CallArgs args = CallArgsFromVp(argc, vp);
3112 ReplaceData rdata(cx);
3113 rdata.str = ThisToStringForStringProto(cx, args);
3114 if (!rdata.str)
3115 return false;
3117 if (!rdata.g.init(cx, args))
3118 return false;
3120 /* Extract replacement string/function. */
3121 if (args.length() >= ReplaceOptArg && js_IsCallable(args[1])) {
3122 rdata.setReplacementFunction(&args[1].toObject());
3124 if (!LambdaIsGetElem(cx, *rdata.lambda, &rdata.elembase))
3125 return false;
3126 } else {
3127 JSLinearString *string = ArgToRootedString(cx, args, 1);
3128 if (!string)
3129 return false;
3131 rdata.setReplacementString(string);
3132 }
3134 rdata.fig.initFunction(ObjectOrNullValue(rdata.lambda));
3136 /*
3137 * Unlike its |String.prototype| brethren, |replace| doesn't convert
3138 * its input to a regular expression. (Even if it contains metachars.)
3139 *
3140 * However, if the user invokes our (non-standard) |flags| argument
3141 * extension then we revert to creating a regular expression. Note that
3142 * this is observable behavior through the side-effect mutation of the
3143 * |RegExp| statics.
3144 */
3146 const FlatMatch *fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, args.length(), false);
3148 if (!fm) {
3149 if (cx->isExceptionPending()) /* oom in RopeMatch in tryFlatMatch */
3150 return false;
3151 return str_replace_regexp(cx, args, rdata);
3152 }
3154 if (fm->match() < 0) {
3155 args.rval().setString(rdata.str);
3156 return true;
3157 }
3159 if (rdata.lambda)
3160 return str_replace_flat_lambda(cx, args, rdata, *fm);
3161 return StrReplaceString(cx, rdata, *fm, args.rval());
3162 }
3164 namespace {
3166 class SplitMatchResult {
3167 size_t endIndex_;
3168 size_t length_;
3170 public:
3171 void setFailure() {
3172 JS_STATIC_ASSERT(SIZE_MAX > JSString::MAX_LENGTH);
3173 endIndex_ = SIZE_MAX;
3174 }
3175 bool isFailure() const {
3176 return endIndex_ == SIZE_MAX;
3177 }
3178 size_t endIndex() const {
3179 JS_ASSERT(!isFailure());
3180 return endIndex_;
3181 }
3182 size_t length() const {
3183 JS_ASSERT(!isFailure());
3184 return length_;
3185 }
3186 void setResult(size_t length, size_t endIndex) {
3187 length_ = length;
3188 endIndex_ = endIndex;
3189 }
3190 };
3192 } /* anonymous namespace */
3194 template<class Matcher>
3195 static ArrayObject *
3196 SplitHelper(JSContext *cx, Handle<JSLinearString*> str, uint32_t limit, const Matcher &splitMatch,
3197 Handle<TypeObject*> type)
3198 {
3199 size_t strLength = str->length();
3200 SplitMatchResult result;
3202 /* Step 11. */
3203 if (strLength == 0) {
3204 if (!splitMatch(cx, str, 0, &result))
3205 return nullptr;
3207 /*
3208 * NB: Unlike in the non-empty string case, it's perfectly fine
3209 * (indeed the spec requires it) if we match at the end of the
3210 * string. Thus these cases should hold:
3211 *
3212 * var a = "".split("");
3213 * assertEq(a.length, 0);
3214 * var b = "".split(/.?/);
3215 * assertEq(b.length, 0);
3216 */
3217 if (!result.isFailure())
3218 return NewDenseEmptyArray(cx);
3220 RootedValue v(cx, StringValue(str));
3221 return NewDenseCopiedArray(cx, 1, v.address());
3222 }
3224 /* Step 12. */
3225 size_t lastEndIndex = 0;
3226 size_t index = 0;
3228 /* Step 13. */
3229 AutoValueVector splits(cx);
3231 while (index < strLength) {
3232 /* Step 13(a). */
3233 if (!splitMatch(cx, str, index, &result))
3234 return nullptr;
3236 /*
3237 * Step 13(b).
3238 *
3239 * Our match algorithm differs from the spec in that it returns the
3240 * next index at which a match happens. If no match happens we're
3241 * done.
3242 *
3243 * But what if the match is at the end of the string (and the string is
3244 * not empty)? Per 13(c)(ii) this shouldn't be a match, so we have to
3245 * specially exclude it. Thus this case should hold:
3246 *
3247 * var a = "abc".split(/\b/);
3248 * assertEq(a.length, 1);
3249 * assertEq(a[0], "abc");
3250 */
3251 if (result.isFailure())
3252 break;
3254 /* Step 13(c)(i). */
3255 size_t sepLength = result.length();
3256 size_t endIndex = result.endIndex();
3257 if (sepLength == 0 && endIndex == strLength)
3258 break;
3260 /* Step 13(c)(ii). */
3261 if (endIndex == lastEndIndex) {
3262 index++;
3263 continue;
3264 }
3266 /* Step 13(c)(iii). */
3267 JS_ASSERT(lastEndIndex < endIndex);
3268 JS_ASSERT(sepLength <= strLength);
3269 JS_ASSERT(lastEndIndex + sepLength <= endIndex);
3271 /* Steps 13(c)(iii)(1-3). */
3272 size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3273 JSString *sub = js_NewDependentString(cx, str, lastEndIndex, subLength);
3274 if (!sub || !splits.append(StringValue(sub)))
3275 return nullptr;
3277 /* Step 13(c)(iii)(4). */
3278 if (splits.length() == limit)
3279 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3281 /* Step 13(c)(iii)(5). */
3282 lastEndIndex = endIndex;
3284 /* Step 13(c)(iii)(6-7). */
3285 if (Matcher::returnsCaptures) {
3286 RegExpStatics *res = cx->global()->getRegExpStatics();
3287 const MatchPairs &matches = res->getMatches();
3288 for (size_t i = 0; i < matches.parenCount(); i++) {
3289 /* Steps 13(c)(iii)(7)(a-c). */
3290 if (!matches[i + 1].isUndefined()) {
3291 JSSubString parsub;
3292 res->getParen(i + 1, &parsub);
3293 sub = js_NewStringCopyN<CanGC>(cx, parsub.chars, parsub.length);
3294 if (!sub || !splits.append(StringValue(sub)))
3295 return nullptr;
3296 } else {
3297 /* Only string entries have been accounted for so far. */
3298 AddTypePropertyId(cx, type, JSID_VOID, UndefinedValue());
3299 if (!splits.append(UndefinedValue()))
3300 return nullptr;
3301 }
3303 /* Step 13(c)(iii)(7)(d). */
3304 if (splits.length() == limit)
3305 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3306 }
3307 }
3309 /* Step 13(c)(iii)(8). */
3310 index = lastEndIndex;
3311 }
3313 /* Steps 14-15. */
3314 JSString *sub = js_NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3315 if (!sub || !splits.append(StringValue(sub)))
3316 return nullptr;
3318 /* Step 16. */
3319 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3320 }
3322 // Fast-path for splitting a string into a character array via split("").
3323 static ArrayObject *
3324 CharSplitHelper(JSContext *cx, Handle<JSLinearString*> str, uint32_t limit)
3325 {
3326 size_t strLength = str->length();
3327 if (strLength == 0)
3328 return NewDenseEmptyArray(cx);
3330 js::StaticStrings &staticStrings = cx->staticStrings();
3331 uint32_t resultlen = (limit < strLength ? limit : strLength);
3333 AutoValueVector splits(cx);
3334 if (!splits.reserve(resultlen))
3335 return nullptr;
3337 for (size_t i = 0; i < resultlen; ++i) {
3338 JSString *sub = staticStrings.getUnitStringForElement(cx, str, i);
3339 if (!sub)
3340 return nullptr;
3341 splits.infallibleAppend(StringValue(sub));
3342 }
3344 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3345 }
3347 namespace {
3349 /*
3350 * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
3351 * paths for regular expression and string separators.
3352 *
3353 * The algorithm differs from the spec in that the we return the next index at
3354 * which a match happens.
3355 */
3356 class SplitRegExpMatcher
3357 {
3358 RegExpShared &re;
3359 RegExpStatics *res;
3361 public:
3362 SplitRegExpMatcher(RegExpShared &re, RegExpStatics *res) : re(re), res(res) {}
3364 static const bool returnsCaptures = true;
3366 bool operator()(JSContext *cx, Handle<JSLinearString*> str, size_t index,
3367 SplitMatchResult *result) const
3368 {
3369 const jschar *chars = str->chars();
3370 size_t length = str->length();
3372 ScopedMatchPairs matches(&cx->tempLifoAlloc());
3373 RegExpRunStatus status = re.execute(cx, chars, length, &index, matches);
3374 if (status == RegExpRunStatus_Error)
3375 return false;
3377 if (status == RegExpRunStatus_Success_NotFound) {
3378 result->setFailure();
3379 return true;
3380 }
3382 if (!res->updateFromMatchPairs(cx, str, matches))
3383 return false;
3385 JSSubString sep;
3386 res->getLastMatch(&sep);
3388 result->setResult(sep.length, index);
3389 return true;
3390 }
3391 };
3393 class SplitStringMatcher
3394 {
3395 Rooted<JSLinearString*> sep;
3397 public:
3398 SplitStringMatcher(JSContext *cx, HandleLinearString sep)
3399 : sep(cx, sep)
3400 {}
3402 static const bool returnsCaptures = false;
3404 bool operator()(JSContext *cx, JSLinearString *str, size_t index, SplitMatchResult *res) const
3405 {
3406 JS_ASSERT(index == 0 || index < str->length());
3407 const jschar *chars = str->chars();
3408 int match = StringMatch(chars + index, str->length() - index,
3409 sep->chars(), sep->length());
3410 if (match == -1)
3411 res->setFailure();
3412 else
3413 res->setResult(sep->length(), index + match + sep->length());
3414 return true;
3415 }
3416 };
3418 } /* anonymous namespace */
3420 /* ES5 15.5.4.14 */
3421 bool
3422 js::str_split(JSContext *cx, unsigned argc, Value *vp)
3423 {
3424 CallArgs args = CallArgsFromVp(argc, vp);
3426 /* Steps 1-2. */
3427 RootedString str(cx, ThisToStringForStringProto(cx, args));
3428 if (!str)
3429 return false;
3431 RootedTypeObject type(cx, GetTypeCallerInitObject(cx, JSProto_Array));
3432 if (!type)
3433 return false;
3434 AddTypePropertyId(cx, type, JSID_VOID, Type::StringType());
3436 /* Step 5: Use the second argument as the split limit, if given. */
3437 uint32_t limit;
3438 if (args.hasDefined(1)) {
3439 double d;
3440 if (!ToNumber(cx, args[1], &d))
3441 return false;
3442 limit = ToUint32(d);
3443 } else {
3444 limit = UINT32_MAX;
3445 }
3447 /* Step 8. */
3448 RegExpGuard re(cx);
3449 RootedLinearString sepstr(cx);
3450 bool sepDefined = args.hasDefined(0);
3451 if (sepDefined) {
3452 if (IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
3453 RootedObject obj(cx, &args[0].toObject());
3454 if (!RegExpToShared(cx, obj, &re))
3455 return false;
3456 } else {
3457 sepstr = ArgToRootedString(cx, args, 0);
3458 if (!sepstr)
3459 return false;
3460 }
3461 }
3463 /* Step 9. */
3464 if (limit == 0) {
3465 JSObject *aobj = NewDenseEmptyArray(cx);
3466 if (!aobj)
3467 return false;
3468 aobj->setType(type);
3469 args.rval().setObject(*aobj);
3470 return true;
3471 }
3473 /* Step 10. */
3474 if (!sepDefined) {
3475 RootedValue v(cx, StringValue(str));
3476 JSObject *aobj = NewDenseCopiedArray(cx, 1, v.address());
3477 if (!aobj)
3478 return false;
3479 aobj->setType(type);
3480 args.rval().setObject(*aobj);
3481 return true;
3482 }
3483 Rooted<JSLinearString*> linearStr(cx, str->ensureLinear(cx));
3484 if (!linearStr)
3485 return false;
3487 /* Steps 11-15. */
3488 RootedObject aobj(cx);
3489 if (!re.initialized()) {
3490 if (sepstr->length() == 0) {
3491 aobj = CharSplitHelper(cx, linearStr, limit);
3492 } else {
3493 SplitStringMatcher matcher(cx, sepstr);
3494 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3495 }
3496 } else {
3497 SplitRegExpMatcher matcher(*re, cx->global()->getRegExpStatics());
3498 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3499 }
3500 if (!aobj)
3501 return false;
3503 /* Step 16. */
3504 aobj->setType(type);
3505 args.rval().setObject(*aobj);
3506 return true;
3507 }
3509 JSObject *
3510 js::str_split_string(JSContext *cx, HandleTypeObject type, HandleString str, HandleString sep)
3511 {
3512 Rooted<JSLinearString*> linearStr(cx, str->ensureLinear(cx));
3513 if (!linearStr)
3514 return nullptr;
3516 Rooted<JSLinearString*> linearSep(cx, sep->ensureLinear(cx));
3517 if (!linearSep)
3518 return nullptr;
3520 uint32_t limit = UINT32_MAX;
3522 RootedObject aobj(cx);
3523 if (linearSep->length() == 0) {
3524 aobj = CharSplitHelper(cx, linearStr, limit);
3525 } else {
3526 SplitStringMatcher matcher(cx, linearSep);
3527 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3528 }
3530 if (!aobj)
3531 return nullptr;
3533 aobj->setType(type);
3534 return aobj;
3535 }
3537 static bool
3538 str_substr(JSContext *cx, unsigned argc, Value *vp)
3539 {
3540 CallArgs args = CallArgsFromVp(argc, vp);
3541 RootedString str(cx, ThisToStringForStringProto(cx, args));
3542 if (!str)
3543 return false;
3545 int32_t length, len, begin;
3546 if (args.length() > 0) {
3547 length = int32_t(str->length());
3548 if (!ValueToIntegerRange(cx, args[0], &begin))
3549 return false;
3551 if (begin >= length) {
3552 args.rval().setString(cx->runtime()->emptyString);
3553 return true;
3554 }
3555 if (begin < 0) {
3556 begin += length; /* length + INT_MIN will always be less than 0 */
3557 if (begin < 0)
3558 begin = 0;
3559 }
3561 if (args.hasDefined(1)) {
3562 if (!ValueToIntegerRange(cx, args[1], &len))
3563 return false;
3565 if (len <= 0) {
3566 args.rval().setString(cx->runtime()->emptyString);
3567 return true;
3568 }
3570 if (uint32_t(length) < uint32_t(begin + len))
3571 len = length - begin;
3572 } else {
3573 len = length - begin;
3574 }
3576 str = DoSubstr(cx, str, size_t(begin), size_t(len));
3577 if (!str)
3578 return false;
3579 }
3581 args.rval().setString(str);
3582 return true;
3583 }
3585 /*
3586 * Python-esque sequence operations.
3587 */
3588 static bool
3589 str_concat(JSContext *cx, unsigned argc, Value *vp)
3590 {
3591 CallArgs args = CallArgsFromVp(argc, vp);
3592 JSString *str = ThisToStringForStringProto(cx, args);
3593 if (!str)
3594 return false;
3596 for (unsigned i = 0; i < args.length(); i++) {
3597 JSString *argStr = ToString<NoGC>(cx, args[i]);
3598 if (!argStr) {
3599 RootedString strRoot(cx, str);
3600 argStr = ToString<CanGC>(cx, args[i]);
3601 if (!argStr)
3602 return false;
3603 str = strRoot;
3604 }
3606 JSString *next = ConcatStrings<NoGC>(cx, str, argStr);
3607 if (next) {
3608 str = next;
3609 } else {
3610 RootedString strRoot(cx, str), argStrRoot(cx, argStr);
3611 str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot);
3612 if (!str)
3613 return false;
3614 }
3615 }
3617 args.rval().setString(str);
3618 return true;
3619 }
3621 static bool
3622 str_slice(JSContext *cx, unsigned argc, Value *vp)
3623 {
3624 CallArgs args = CallArgsFromVp(argc, vp);
3626 if (args.length() == 1 && args.thisv().isString() && args[0].isInt32()) {
3627 JSString *str = args.thisv().toString();
3628 size_t begin = args[0].toInt32();
3629 size_t end = str->length();
3630 if (begin <= end) {
3631 size_t length = end - begin;
3632 if (length == 0) {
3633 str = cx->runtime()->emptyString;
3634 } else {
3635 str = (length == 1)
3636 ? cx->staticStrings().getUnitStringForElement(cx, str, begin)
3637 : js_NewDependentString(cx, str, begin, length);
3638 if (!str)
3639 return false;
3640 }
3641 args.rval().setString(str);
3642 return true;
3643 }
3644 }
3646 RootedString str(cx, ThisToStringForStringProto(cx, args));
3647 if (!str)
3648 return false;
3650 if (args.length() != 0) {
3651 double begin, end, length;
3653 if (!ToInteger(cx, args[0], &begin))
3654 return false;
3655 length = str->length();
3656 if (begin < 0) {
3657 begin += length;
3658 if (begin < 0)
3659 begin = 0;
3660 } else if (begin > length) {
3661 begin = length;
3662 }
3664 if (args.hasDefined(1)) {
3665 if (!ToInteger(cx, args[1], &end))
3666 return false;
3667 if (end < 0) {
3668 end += length;
3669 if (end < 0)
3670 end = 0;
3671 } else if (end > length) {
3672 end = length;
3673 }
3674 if (end < begin)
3675 end = begin;
3676 } else {
3677 end = length;
3678 }
3680 str = js_NewDependentString(cx, str,
3681 (size_t)begin,
3682 (size_t)(end - begin));
3683 if (!str)
3684 return false;
3685 }
3686 args.rval().setString(str);
3687 return true;
3688 }
3690 #if JS_HAS_STR_HTML_HELPERS
3691 /*
3692 * HTML composition aids.
3693 */
3694 static bool
3695 tagify(JSContext *cx, const char *begin, HandleLinearString param, const char *end,
3696 CallReceiver call)
3697 {
3698 JSString *thisstr = ThisToStringForStringProto(cx, call);
3699 if (!thisstr)
3700 return false;
3702 JSLinearString *str = thisstr->ensureLinear(cx);
3703 if (!str)
3704 return false;
3706 if (!end)
3707 end = begin;
3709 size_t beglen = strlen(begin);
3710 size_t taglen = 1 + beglen + 1; /* '<begin' + '>' */
3711 if (param) {
3712 size_t numChars = param->length();
3713 const jschar *parchars = param->chars();
3714 for (size_t i = 0, parlen = numChars; i < parlen; ++i) {
3715 if (parchars[i] == '"')
3716 numChars += 5; /* len(") - len(") */
3717 }
3718 taglen += 2 + numChars + 1; /* '="param"' */
3719 }
3720 size_t endlen = strlen(end);
3721 taglen += str->length() + 2 + endlen + 1; /* 'str</end>' */
3724 StringBuffer sb(cx);
3725 if (!sb.reserve(taglen))
3726 return false;
3728 sb.infallibleAppend('<');
3730 MOZ_ALWAYS_TRUE(sb.appendInflated(begin, beglen));
3732 if (param) {
3733 sb.infallibleAppend('=');
3734 sb.infallibleAppend('"');
3735 const jschar *parchars = param->chars();
3736 for (size_t i = 0, parlen = param->length(); i < parlen; ++i) {
3737 if (parchars[i] != '"') {
3738 sb.infallibleAppend(parchars[i]);
3739 } else {
3740 MOZ_ALWAYS_TRUE(sb.append("""));
3741 }
3742 }
3743 sb.infallibleAppend('"');
3744 }
3746 sb.infallibleAppend('>');
3748 MOZ_ALWAYS_TRUE(sb.append(str));
3750 sb.infallibleAppend('<');
3751 sb.infallibleAppend('/');
3753 MOZ_ALWAYS_TRUE(sb.appendInflated(end, endlen));
3755 sb.infallibleAppend('>');
3757 JSFlatString *retstr = sb.finishString();
3758 if (!retstr)
3759 return false;
3761 call.rval().setString(retstr);
3762 return true;
3763 }
3765 static bool
3766 tagify_value(JSContext *cx, CallArgs args, const char *begin, const char *end)
3767 {
3768 RootedLinearString param(cx, ArgToRootedString(cx, args, 0));
3769 if (!param)
3770 return false;
3772 return tagify(cx, begin, param, end, args);
3773 }
3775 static bool
3776 str_bold(JSContext *cx, unsigned argc, Value *vp)
3777 {
3778 return tagify(cx, "b", NullPtr(), nullptr, CallReceiverFromVp(vp));
3779 }
3781 static bool
3782 str_italics(JSContext *cx, unsigned argc, Value *vp)
3783 {
3784 return tagify(cx, "i", NullPtr(), nullptr, CallReceiverFromVp(vp));
3785 }
3787 static bool
3788 str_fixed(JSContext *cx, unsigned argc, Value *vp)
3789 {
3790 return tagify(cx, "tt", NullPtr(), nullptr, CallReceiverFromVp(vp));
3791 }
3793 static bool
3794 str_fontsize(JSContext *cx, unsigned argc, Value *vp)
3795 {
3796 return tagify_value(cx, CallArgsFromVp(argc, vp), "font size", "font");
3797 }
3799 static bool
3800 str_fontcolor(JSContext *cx, unsigned argc, Value *vp)
3801 {
3802 return tagify_value(cx, CallArgsFromVp(argc, vp), "font color", "font");
3803 }
3805 static bool
3806 str_link(JSContext *cx, unsigned argc, Value *vp)
3807 {
3808 return tagify_value(cx, CallArgsFromVp(argc, vp), "a href", "a");
3809 }
3811 static bool
3812 str_anchor(JSContext *cx, unsigned argc, Value *vp)
3813 {
3814 return tagify_value(cx, CallArgsFromVp(argc, vp), "a name", "a");
3815 }
3817 static bool
3818 str_strike(JSContext *cx, unsigned argc, Value *vp)
3819 {
3820 return tagify(cx, "strike", NullPtr(), nullptr, CallReceiverFromVp(vp));
3821 }
3823 static bool
3824 str_small(JSContext *cx, unsigned argc, Value *vp)
3825 {
3826 return tagify(cx, "small", NullPtr(), nullptr, CallReceiverFromVp(vp));
3827 }
3829 static bool
3830 str_big(JSContext *cx, unsigned argc, Value *vp)
3831 {
3832 return tagify(cx, "big", NullPtr(), nullptr, CallReceiverFromVp(vp));
3833 }
3835 static bool
3836 str_blink(JSContext *cx, unsigned argc, Value *vp)
3837 {
3838 return tagify(cx, "blink", NullPtr(), nullptr, CallReceiverFromVp(vp));
3839 }
3841 static bool
3842 str_sup(JSContext *cx, unsigned argc, Value *vp)
3843 {
3844 return tagify(cx, "sup", NullPtr(), nullptr, CallReceiverFromVp(vp));
3845 }
3847 static bool
3848 str_sub(JSContext *cx, unsigned argc, Value *vp)
3849 {
3850 return tagify(cx, "sub", NullPtr(), nullptr, CallReceiverFromVp(vp));
3851 }
3852 #endif /* JS_HAS_STR_HTML_HELPERS */
3854 static const JSFunctionSpec string_methods[] = {
3855 #if JS_HAS_TOSOURCE
3856 JS_FN("quote", str_quote, 0,JSFUN_GENERIC_NATIVE),
3857 JS_FN(js_toSource_str, str_toSource, 0,0),
3858 #endif
3860 /* Java-like methods. */
3861 JS_FN(js_toString_str, js_str_toString, 0,0),
3862 JS_FN(js_valueOf_str, js_str_toString, 0,0),
3863 JS_FN("substring", str_substring, 2,JSFUN_GENERIC_NATIVE),
3864 JS_FN("toLowerCase", str_toLowerCase, 0,JSFUN_GENERIC_NATIVE),
3865 JS_FN("toUpperCase", str_toUpperCase, 0,JSFUN_GENERIC_NATIVE),
3866 JS_FN("charAt", js_str_charAt, 1,JSFUN_GENERIC_NATIVE),
3867 JS_FN("charCodeAt", js_str_charCodeAt, 1,JSFUN_GENERIC_NATIVE),
3868 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0),
3869 JS_FN("contains", str_contains, 1,JSFUN_GENERIC_NATIVE),
3870 JS_FN("indexOf", str_indexOf, 1,JSFUN_GENERIC_NATIVE),
3871 JS_FN("lastIndexOf", str_lastIndexOf, 1,JSFUN_GENERIC_NATIVE),
3872 JS_FN("startsWith", str_startsWith, 1,JSFUN_GENERIC_NATIVE),
3873 JS_FN("endsWith", str_endsWith, 1,JSFUN_GENERIC_NATIVE),
3874 JS_FN("trim", str_trim, 0,JSFUN_GENERIC_NATIVE),
3875 JS_FN("trimLeft", str_trimLeft, 0,JSFUN_GENERIC_NATIVE),
3876 JS_FN("trimRight", str_trimRight, 0,JSFUN_GENERIC_NATIVE),
3877 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,JSFUN_GENERIC_NATIVE),
3878 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,JSFUN_GENERIC_NATIVE),
3879 #if EXPOSE_INTL_API
3880 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
3881 #else
3882 JS_FN("localeCompare", str_localeCompare, 1,JSFUN_GENERIC_NATIVE),
3883 #endif
3884 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0),
3885 #if EXPOSE_INTL_API
3886 JS_FN("normalize", str_normalize, 0,JSFUN_GENERIC_NATIVE),
3887 #endif
3889 /* Perl-ish methods (search is actually Python-esque). */
3890 JS_FN("match", str_match, 1,JSFUN_GENERIC_NATIVE),
3891 JS_FN("search", str_search, 1,JSFUN_GENERIC_NATIVE),
3892 JS_FN("replace", str_replace, 2,JSFUN_GENERIC_NATIVE),
3893 JS_FN("split", str_split, 2,JSFUN_GENERIC_NATIVE),
3894 JS_FN("substr", str_substr, 2,JSFUN_GENERIC_NATIVE),
3896 /* Python-esque sequence methods. */
3897 JS_FN("concat", str_concat, 1,JSFUN_GENERIC_NATIVE),
3898 JS_FN("slice", str_slice, 2,JSFUN_GENERIC_NATIVE),
3900 /* HTML string methods. */
3901 #if JS_HAS_STR_HTML_HELPERS
3902 JS_FN("bold", str_bold, 0,0),
3903 JS_FN("italics", str_italics, 0,0),
3904 JS_FN("fixed", str_fixed, 0,0),
3905 JS_FN("fontsize", str_fontsize, 1,0),
3906 JS_FN("fontcolor", str_fontcolor, 1,0),
3907 JS_FN("link", str_link, 1,0),
3908 JS_FN("anchor", str_anchor, 1,0),
3909 JS_FN("strike", str_strike, 0,0),
3910 JS_FN("small", str_small, 0,0),
3911 JS_FN("big", str_big, 0,0),
3912 JS_FN("blink", str_blink, 0,0),
3913 JS_FN("sup", str_sup, 0,0),
3914 JS_FN("sub", str_sub, 0,0),
3915 #endif
3916 JS_SELF_HOSTED_FN("@@iterator", "String_iterator", 0,0),
3917 JS_FS_END
3918 };
3920 bool
3921 js_String(JSContext *cx, unsigned argc, Value *vp)
3922 {
3923 CallArgs args = CallArgsFromVp(argc, vp);
3925 RootedString str(cx);
3926 if (args.length() > 0) {
3927 str = ToString<CanGC>(cx, args[0]);
3928 if (!str)
3929 return false;
3930 } else {
3931 str = cx->runtime()->emptyString;
3932 }
3934 if (args.isConstructing()) {
3935 StringObject *strobj = StringObject::create(cx, str);
3936 if (!strobj)
3937 return false;
3938 args.rval().setObject(*strobj);
3939 return true;
3940 }
3942 args.rval().setString(str);
3943 return true;
3944 }
3946 bool
3947 js::str_fromCharCode(JSContext *cx, unsigned argc, Value *vp)
3948 {
3949 CallArgs args = CallArgsFromVp(argc, vp);
3951 JS_ASSERT(args.length() <= ARGS_LENGTH_MAX);
3952 if (args.length() == 1) {
3953 uint16_t code;
3954 if (!ToUint16(cx, args[0], &code))
3955 return false;
3956 if (StaticStrings::hasUnit(code)) {
3957 args.rval().setString(cx->staticStrings().getUnit(code));
3958 return true;
3959 }
3960 args[0].setInt32(code);
3961 }
3962 jschar *chars = cx->pod_malloc<jschar>(args.length() + 1);
3963 if (!chars)
3964 return false;
3965 for (unsigned i = 0; i < args.length(); i++) {
3966 uint16_t code;
3967 if (!ToUint16(cx, args[i], &code)) {
3968 js_free(chars);
3969 return false;
3970 }
3971 chars[i] = (jschar)code;
3972 }
3973 chars[args.length()] = 0;
3974 JSString *str = js_NewString<CanGC>(cx, chars, args.length());
3975 if (!str) {
3976 js_free(chars);
3977 return false;
3978 }
3980 args.rval().setString(str);
3981 return true;
3982 }
3984 static const JSFunctionSpec string_static_methods[] = {
3985 JS_FN("fromCharCode", js::str_fromCharCode, 1, 0),
3986 JS_SELF_HOSTED_FN("fromCodePoint", "String_static_fromCodePoint", 0,0),
3988 // This must be at the end because of bug 853075: functions listed after
3989 // self-hosted methods aren't available in self-hosted code.
3990 #if EXPOSE_INTL_API
3991 JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2,0),
3992 #endif
3993 JS_FS_END
3994 };
3996 /* static */ Shape *
3997 StringObject::assignInitialShape(ExclusiveContext *cx, Handle<StringObject*> obj)
3998 {
3999 JS_ASSERT(obj->nativeEmpty());
4001 return obj->addDataProperty(cx, cx->names().length, LENGTH_SLOT,
4002 JSPROP_PERMANENT | JSPROP_READONLY);
4003 }
4005 JSObject *
4006 js_InitStringClass(JSContext *cx, HandleObject obj)
4007 {
4008 JS_ASSERT(obj->isNative());
4010 Rooted<GlobalObject*> global(cx, &obj->as<GlobalObject>());
4012 Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
4013 RootedObject proto(cx, global->createBlankPrototype(cx, &StringObject::class_));
4014 if (!proto || !proto->as<StringObject>().init(cx, empty))
4015 return nullptr;
4017 /* Now create the String function. */
4018 RootedFunction ctor(cx);
4019 ctor = global->createConstructor(cx, js_String, cx->names().String, 1);
4020 if (!ctor)
4021 return nullptr;
4023 if (!LinkConstructorAndPrototype(cx, ctor, proto))
4024 return nullptr;
4026 if (!DefinePropertiesAndBrand(cx, proto, nullptr, string_methods) ||
4027 !DefinePropertiesAndBrand(cx, ctor, nullptr, string_static_methods))
4028 {
4029 return nullptr;
4030 }
4032 if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor, proto))
4033 return nullptr;
4035 /*
4036 * Define escape/unescape, the URI encode/decode functions, and maybe
4037 * uneval on the global object.
4038 */
4039 if (!JS_DefineFunctions(cx, global, string_functions))
4040 return nullptr;
4042 return proto;
4043 }
4045 template <AllowGC allowGC>
4046 JSFlatString *
4047 js_NewString(ThreadSafeContext *cx, jschar *chars, size_t length)
4048 {
4049 if (length == 1) {
4050 jschar c = chars[0];
4051 if (StaticStrings::hasUnit(c)) {
4052 // Free |chars| because we're taking possession of it, but it's no
4053 // longer needed because we use the static string instead.
4054 js_free(chars);
4055 return cx->staticStrings().getUnit(c);
4056 }
4057 }
4059 return JSFlatString::new_<allowGC>(cx, chars, length);
4060 }
4062 template JSFlatString *
4063 js_NewString<CanGC>(ThreadSafeContext *cx, jschar *chars, size_t length);
4065 template JSFlatString *
4066 js_NewString<NoGC>(ThreadSafeContext *cx, jschar *chars, size_t length);
4068 JSLinearString *
4069 js_NewDependentString(JSContext *cx, JSString *baseArg, size_t start, size_t length)
4070 {
4071 if (length == 0)
4072 return cx->emptyString();
4074 JSLinearString *base = baseArg->ensureLinear(cx);
4075 if (!base)
4076 return nullptr;
4078 if (start == 0 && length == base->length())
4079 return base;
4081 const jschar *chars = base->chars() + start;
4083 if (JSLinearString *staticStr = cx->staticStrings().lookup(chars, length))
4084 return staticStr;
4086 return JSDependentString::new_(cx, base, chars, length);
4087 }
4089 template <AllowGC allowGC>
4090 JSFlatString *
4091 js_NewStringCopyN(ExclusiveContext *cx, const jschar *s, size_t n)
4092 {
4093 if (JSFatInlineString::lengthFits(n))
4094 return NewFatInlineString<allowGC>(cx, TwoByteChars(s, n));
4096 jschar *news = cx->pod_malloc<jschar>(n + 1);
4097 if (!news)
4098 return nullptr;
4099 js_strncpy(news, s, n);
4100 news[n] = 0;
4101 JSFlatString *str = js_NewString<allowGC>(cx, news, n);
4102 if (!str)
4103 js_free(news);
4104 return str;
4105 }
4107 template JSFlatString *
4108 js_NewStringCopyN<CanGC>(ExclusiveContext *cx, const jschar *s, size_t n);
4110 template JSFlatString *
4111 js_NewStringCopyN<NoGC>(ExclusiveContext *cx, const jschar *s, size_t n);
4113 template <AllowGC allowGC>
4114 JSFlatString *
4115 js_NewStringCopyN(ThreadSafeContext *cx, const char *s, size_t n)
4116 {
4117 if (JSFatInlineString::lengthFits(n))
4118 return NewFatInlineString<allowGC>(cx, JS::Latin1Chars(s, n));
4120 jschar *chars = InflateString(cx, s, &n);
4121 if (!chars)
4122 return nullptr;
4123 JSFlatString *str = js_NewString<allowGC>(cx, chars, n);
4124 if (!str)
4125 js_free(chars);
4126 return str;
4127 }
4129 template JSFlatString *
4130 js_NewStringCopyN<CanGC>(ThreadSafeContext *cx, const char *s, size_t n);
4132 template JSFlatString *
4133 js_NewStringCopyN<NoGC>(ThreadSafeContext *cx, const char *s, size_t n);
4135 template <AllowGC allowGC>
4136 JSFlatString *
4137 js_NewStringCopyZ(ExclusiveContext *cx, const jschar *s)
4138 {
4139 size_t n = js_strlen(s);
4140 if (JSFatInlineString::lengthFits(n))
4141 return NewFatInlineString<allowGC>(cx, TwoByteChars(s, n));
4143 size_t m = (n + 1) * sizeof(jschar);
4144 jschar *news = (jschar *) cx->malloc_(m);
4145 if (!news)
4146 return nullptr;
4147 js_memcpy(news, s, m);
4148 JSFlatString *str = js_NewString<allowGC>(cx, news, n);
4149 if (!str)
4150 js_free(news);
4151 return str;
4152 }
4154 template JSFlatString *
4155 js_NewStringCopyZ<CanGC>(ExclusiveContext *cx, const jschar *s);
4157 template JSFlatString *
4158 js_NewStringCopyZ<NoGC>(ExclusiveContext *cx, const jschar *s);
4160 template <AllowGC allowGC>
4161 JSFlatString *
4162 js_NewStringCopyZ(ThreadSafeContext *cx, const char *s)
4163 {
4164 return js_NewStringCopyN<allowGC>(cx, s, strlen(s));
4165 }
4167 template JSFlatString *
4168 js_NewStringCopyZ<CanGC>(ThreadSafeContext *cx, const char *s);
4170 template JSFlatString *
4171 js_NewStringCopyZ<NoGC>(ThreadSafeContext *cx, const char *s);
4173 const char *
4174 js_ValueToPrintable(JSContext *cx, const Value &vArg, JSAutoByteString *bytes, bool asSource)
4175 {
4176 RootedValue v(cx, vArg);
4177 JSString *str;
4178 if (asSource)
4179 str = ValueToSource(cx, v);
4180 else
4181 str = ToString<CanGC>(cx, v);
4182 if (!str)
4183 return nullptr;
4184 str = js_QuoteString(cx, str, 0);
4185 if (!str)
4186 return nullptr;
4187 return bytes->encodeLatin1(cx, str);
4188 }
4190 template <AllowGC allowGC>
4191 JSString *
4192 js::ToStringSlow(ExclusiveContext *cx, typename MaybeRooted<Value, allowGC>::HandleType arg)
4193 {
4194 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
4195 JS_ASSERT(!arg.isString());
4197 Value v = arg;
4198 if (!v.isPrimitive()) {
4199 if (!cx->shouldBeJSContext() || !allowGC)
4200 return nullptr;
4201 RootedValue v2(cx, v);
4202 if (!ToPrimitive(cx->asJSContext(), JSTYPE_STRING, &v2))
4203 return nullptr;
4204 v = v2;
4205 }
4207 JSString *str;
4208 if (v.isString()) {
4209 str = v.toString();
4210 } else if (v.isInt32()) {
4211 str = Int32ToString<allowGC>(cx, v.toInt32());
4212 } else if (v.isDouble()) {
4213 str = NumberToString<allowGC>(cx, v.toDouble());
4214 } else if (v.isBoolean()) {
4215 str = js_BooleanToString(cx, v.toBoolean());
4216 } else if (v.isNull()) {
4217 str = cx->names().null;
4218 } else {
4219 str = cx->names().undefined;
4220 }
4221 return str;
4222 }
4224 template JSString *
4225 js::ToStringSlow<CanGC>(ExclusiveContext *cx, HandleValue arg);
4227 template JSString *
4228 js::ToStringSlow<NoGC>(ExclusiveContext *cx, Value arg);
4230 JS_PUBLIC_API(JSString *)
4231 js::ToStringSlow(JSContext *cx, HandleValue v)
4232 {
4233 return ToStringSlow<CanGC>(cx, v);
4234 }
4236 JSString *
4237 js::ValueToSource(JSContext *cx, HandleValue v)
4238 {
4239 JS_CHECK_RECURSION(cx, return nullptr);
4240 assertSameCompartment(cx, v);
4242 if (v.isUndefined())
4243 return cx->names().void0;
4244 if (v.isString())
4245 return StringToSource(cx, v.toString());
4246 if (v.isPrimitive()) {
4247 /* Special case to preserve negative zero, _contra_ toString. */
4248 if (v.isDouble() && IsNegativeZero(v.toDouble())) {
4249 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
4250 static const jschar js_negzero_ucNstr[] = {'-', '0'};
4252 return js_NewStringCopyN<CanGC>(cx, js_negzero_ucNstr, 2);
4253 }
4254 return ToString<CanGC>(cx, v);
4255 }
4257 RootedValue fval(cx);
4258 RootedObject obj(cx, &v.toObject());
4259 if (!JSObject::getProperty(cx, obj, obj, cx->names().toSource, &fval))
4260 return nullptr;
4261 if (js_IsCallable(fval)) {
4262 RootedValue rval(cx);
4263 if (!Invoke(cx, ObjectValue(*obj), fval, 0, nullptr, &rval))
4264 return nullptr;
4265 return ToString<CanGC>(cx, rval);
4266 }
4268 return ObjectToSource(cx, obj);
4269 }
4271 JSString *
4272 js::StringToSource(JSContext *cx, JSString *str)
4273 {
4274 return js_QuoteString(cx, str, '"');
4275 }
4277 bool
4278 js::EqualStrings(JSContext *cx, JSString *str1, JSString *str2, bool *result)
4279 {
4280 if (str1 == str2) {
4281 *result = true;
4282 return true;
4283 }
4285 size_t length1 = str1->length();
4286 if (length1 != str2->length()) {
4287 *result = false;
4288 return true;
4289 }
4291 JSLinearString *linear1 = str1->ensureLinear(cx);
4292 if (!linear1)
4293 return false;
4294 JSLinearString *linear2 = str2->ensureLinear(cx);
4295 if (!linear2)
4296 return false;
4298 *result = PodEqual(linear1->chars(), linear2->chars(), length1);
4299 return true;
4300 }
4302 bool
4303 js::EqualStrings(JSLinearString *str1, JSLinearString *str2)
4304 {
4305 if (str1 == str2)
4306 return true;
4308 size_t length1 = str1->length();
4309 if (length1 != str2->length())
4310 return false;
4312 return PodEqual(str1->chars(), str2->chars(), length1);
4313 }
4315 static bool
4316 CompareStringsImpl(JSContext *cx, JSString *str1, JSString *str2, int32_t *result)
4317 {
4318 JS_ASSERT(str1);
4319 JS_ASSERT(str2);
4321 if (str1 == str2) {
4322 *result = 0;
4323 return true;
4324 }
4326 const jschar *s1 = str1->getChars(cx);
4327 if (!s1)
4328 return false;
4330 const jschar *s2 = str2->getChars(cx);
4331 if (!s2)
4332 return false;
4334 *result = CompareChars(s1, str1->length(), s2, str2->length());
4335 return true;
4336 }
4338 bool
4339 js::CompareStrings(JSContext *cx, JSString *str1, JSString *str2, int32_t *result)
4340 {
4341 return CompareStringsImpl(cx, str1, str2, result);
4342 }
4344 int32_t
4345 js::CompareAtoms(JSAtom *atom1, JSAtom *atom2)
4346 {
4347 return CompareChars(atom1->chars(), atom1->length(), atom2->chars(), atom2->length());
4348 }
4350 bool
4351 js::StringEqualsAscii(JSLinearString *str, const char *asciiBytes)
4352 {
4353 size_t length = strlen(asciiBytes);
4354 #ifdef DEBUG
4355 for (size_t i = 0; i != length; ++i)
4356 JS_ASSERT(unsigned(asciiBytes[i]) <= 127);
4357 #endif
4358 if (length != str->length())
4359 return false;
4360 const jschar *chars = str->chars();
4361 for (size_t i = 0; i != length; ++i) {
4362 if (unsigned(asciiBytes[i]) != unsigned(chars[i]))
4363 return false;
4364 }
4365 return true;
4366 }
4368 size_t
4369 js_strlen(const jschar *s)
4370 {
4371 const jschar *t;
4373 for (t = s; *t != 0; t++)
4374 continue;
4375 return (size_t)(t - s);
4376 }
4378 int32_t
4379 js_strcmp(const jschar *lhs, const jschar *rhs)
4380 {
4381 while (true) {
4382 if (*lhs != *rhs)
4383 return int32_t(*lhs) - int32_t(*rhs);
4384 if (*lhs == 0)
4385 return 0;
4386 ++lhs, ++rhs;
4387 }
4388 }
4390 jschar *
4391 js_strdup(js::ThreadSafeContext *cx, const jschar *s)
4392 {
4393 size_t n = js_strlen(s);
4394 jschar *ret = cx->pod_malloc<jschar>(n + 1);
4395 if (!ret)
4396 return nullptr;
4397 js_strncpy(ret, s, n);
4398 ret[n] = '\0';
4399 return ret;
4400 }
4402 jschar *
4403 js_strchr_limit(const jschar *s, jschar c, const jschar *limit)
4404 {
4405 while (s < limit) {
4406 if (*s == c)
4407 return (jschar *)s;
4408 s++;
4409 }
4410 return nullptr;
4411 }
4413 jschar *
4414 js::InflateString(ThreadSafeContext *cx, const char *bytes, size_t *lengthp)
4415 {
4416 size_t nchars;
4417 jschar *chars;
4418 size_t nbytes = *lengthp;
4420 nchars = nbytes;
4421 chars = cx->pod_malloc<jschar>(nchars + 1);
4422 if (!chars)
4423 goto bad;
4424 for (size_t i = 0; i < nchars; i++)
4425 chars[i] = (unsigned char) bytes[i];
4426 *lengthp = nchars;
4427 chars[nchars] = 0;
4428 return chars;
4430 bad:
4431 // For compatibility with callers of JS_DecodeBytes we must zero lengthp
4432 // on errors.
4433 *lengthp = 0;
4434 return nullptr;
4435 }
4437 bool
4438 js::DeflateStringToBuffer(JSContext *maybecx, const jschar *src, size_t srclen,
4439 char *dst, size_t *dstlenp)
4440 {
4441 size_t dstlen = *dstlenp;
4442 if (srclen > dstlen) {
4443 for (size_t i = 0; i < dstlen; i++)
4444 dst[i] = (char) src[i];
4445 if (maybecx) {
4446 AutoSuppressGC suppress(maybecx);
4447 JS_ReportErrorNumber(maybecx, js_GetErrorMessage, nullptr,
4448 JSMSG_BUFFER_TOO_SMALL);
4449 }
4450 return false;
4451 }
4452 for (size_t i = 0; i < srclen; i++)
4453 dst[i] = (char) src[i];
4454 *dstlenp = srclen;
4455 return true;
4456 }
4458 #define ____ false
4460 /*
4461 * Identifier start chars:
4462 * - 36: $
4463 * - 65..90: A..Z
4464 * - 95: _
4465 * - 97..122: a..z
4466 */
4467 const bool js_isidstart[] = {
4468 /* 0 1 2 3 4 5 6 7 8 9 */
4469 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4470 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4471 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4472 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4473 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4474 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4475 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4476 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4477 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4478 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4479 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4480 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4481 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4482 };
4484 /*
4485 * Identifier chars:
4486 * - 36: $
4487 * - 48..57: 0..9
4488 * - 65..90: A..Z
4489 * - 95: _
4490 * - 97..122: a..z
4491 */
4492 const bool js_isident[] = {
4493 /* 0 1 2 3 4 5 6 7 8 9 */
4494 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4495 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4496 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4497 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4498 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4499 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4500 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4501 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4502 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4503 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4504 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4505 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4506 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4507 };
4509 /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
4510 const bool js_isspace[] = {
4511 /* 0 1 2 3 4 5 6 7 8 9 */
4512 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true,
4513 /* 1 */ true, true, true, true, ____, ____, ____, ____, ____, ____,
4514 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4515 /* 3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____,
4516 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4517 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4518 /* 6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4519 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4520 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4521 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4522 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4523 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4524 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4525 };
4527 /*
4528 * Uri reserved chars + #:
4529 * - 35: #
4530 * - 36: $
4531 * - 38: &
4532 * - 43: +
4533 * - 44: ,
4534 * - 47: /
4535 * - 58: :
4536 * - 59: ;
4537 * - 61: =
4538 * - 63: ?
4539 * - 64: @
4540 */
4541 static const bool js_isUriReservedPlusPound[] = {
4542 /* 0 1 2 3 4 5 6 7 8 9 */
4543 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4544 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4545 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4546 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
4547 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
4548 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4549 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
4550 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4551 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4552 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4553 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4554 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4555 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4556 };
4558 /*
4559 * Uri unescaped chars:
4560 * - 33: !
4561 * - 39: '
4562 * - 40: (
4563 * - 41: )
4564 * - 42: *
4565 * - 45: -
4566 * - 46: .
4567 * - 48..57: 0-9
4568 * - 65..90: A-Z
4569 * - 95: _
4570 * - 97..122: a-z
4571 * - 126: ~
4572 */
4573 static const bool js_isUriUnescaped[] = {
4574 /* 0 1 2 3 4 5 6 7 8 9 */
4575 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4576 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4577 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4578 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
4579 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true,
4580 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4581 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4582 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4583 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4584 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4585 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4586 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4587 /* 12 */ true, true, true, ____, ____, ____, true, ____
4588 };
4590 #undef ____
4592 #define URI_CHUNK 64U
4594 static inline bool
4595 TransferBufferToString(StringBuffer &sb, MutableHandleValue rval)
4596 {
4597 JSString *str = sb.finishString();
4598 if (!str)
4599 return false;
4600 rval.setString(str);
4601 return true;
4602 }
4604 /*
4605 * ECMA 3, 15.1.3 URI Handling Function Properties
4606 *
4607 * The following are implementations of the algorithms
4608 * given in the ECMA specification for the hidden functions
4609 * 'Encode' and 'Decode'.
4610 */
4611 static bool
4612 Encode(JSContext *cx, Handle<JSLinearString*> str, const bool *unescapedSet,
4613 const bool *unescapedSet2, MutableHandleValue rval)
4614 {
4615 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
4617 size_t length = str->length();
4618 if (length == 0) {
4619 rval.setString(cx->runtime()->emptyString);
4620 return true;
4621 }
4623 const jschar *chars = str->chars();
4624 StringBuffer sb(cx);
4625 if (!sb.reserve(length))
4626 return false;
4627 jschar hexBuf[4];
4628 hexBuf[0] = '%';
4629 hexBuf[3] = 0;
4630 for (size_t k = 0; k < length; k++) {
4631 jschar c = chars[k];
4632 if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) {
4633 if (!sb.append(c))
4634 return false;
4635 } else {
4636 if ((c >= 0xDC00) && (c <= 0xDFFF)) {
4637 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_BAD_URI, nullptr);
4638 return false;
4639 }
4640 uint32_t v;
4641 if (c < 0xD800 || c > 0xDBFF) {
4642 v = c;
4643 } else {
4644 k++;
4645 if (k == length) {
4646 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr,
4647 JSMSG_BAD_URI, nullptr);
4648 return false;
4649 }
4650 jschar c2 = chars[k];
4651 if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
4652 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr,
4653 JSMSG_BAD_URI, nullptr);
4654 return false;
4655 }
4656 v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
4657 }
4658 uint8_t utf8buf[4];
4659 size_t L = js_OneUcs4ToUtf8Char(utf8buf, v);
4660 for (size_t j = 0; j < L; j++) {
4661 hexBuf[1] = HexDigits[utf8buf[j] >> 4];
4662 hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
4663 if (!sb.append(hexBuf, 3))
4664 return false;
4665 }
4666 }
4667 }
4669 return TransferBufferToString(sb, rval);
4670 }
4672 static bool
4673 Decode(JSContext *cx, Handle<JSLinearString*> str, const bool *reservedSet, MutableHandleValue rval)
4674 {
4675 size_t length = str->length();
4676 if (length == 0) {
4677 rval.setString(cx->runtime()->emptyString);
4678 return true;
4679 }
4681 const jschar *chars = str->chars();
4682 StringBuffer sb(cx);
4683 for (size_t k = 0; k < length; k++) {
4684 jschar c = chars[k];
4685 if (c == '%') {
4686 size_t start = k;
4687 if ((k + 2) >= length)
4688 goto report_bad_uri;
4689 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4690 goto report_bad_uri;
4691 uint32_t B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4692 k += 2;
4693 if (!(B & 0x80)) {
4694 c = (jschar)B;
4695 } else {
4696 int n = 1;
4697 while (B & (0x80 >> n))
4698 n++;
4699 if (n == 1 || n > 4)
4700 goto report_bad_uri;
4701 uint8_t octets[4];
4702 octets[0] = (uint8_t)B;
4703 if (k + 3 * (n - 1) >= length)
4704 goto report_bad_uri;
4705 for (int j = 1; j < n; j++) {
4706 k++;
4707 if (chars[k] != '%')
4708 goto report_bad_uri;
4709 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4710 goto report_bad_uri;
4711 B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4712 if ((B & 0xC0) != 0x80)
4713 goto report_bad_uri;
4714 k += 2;
4715 octets[j] = (char)B;
4716 }
4717 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4718 if (v >= 0x10000) {
4719 v -= 0x10000;
4720 if (v > 0xFFFFF)
4721 goto report_bad_uri;
4722 c = (jschar)((v & 0x3FF) + 0xDC00);
4723 jschar H = (jschar)((v >> 10) + 0xD800);
4724 if (!sb.append(H))
4725 return false;
4726 } else {
4727 c = (jschar)v;
4728 }
4729 }
4730 if (c < 128 && reservedSet && reservedSet[c]) {
4731 if (!sb.append(chars + start, k - start + 1))
4732 return false;
4733 } else {
4734 if (!sb.append(c))
4735 return false;
4736 }
4737 } else {
4738 if (!sb.append(c))
4739 return false;
4740 }
4741 }
4743 return TransferBufferToString(sb, rval);
4745 report_bad_uri:
4746 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_BAD_URI);
4747 /* FALL THROUGH */
4749 return false;
4750 }
4752 static bool
4753 str_decodeURI(JSContext *cx, unsigned argc, Value *vp)
4754 {
4755 CallArgs args = CallArgsFromVp(argc, vp);
4756 Rooted<JSLinearString*> str(cx, ArgToRootedString(cx, args, 0));
4757 if (!str)
4758 return false;
4760 return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
4761 }
4763 static bool
4764 str_decodeURI_Component(JSContext *cx, unsigned argc, Value *vp)
4765 {
4766 CallArgs args = CallArgsFromVp(argc, vp);
4767 Rooted<JSLinearString*> str(cx, ArgToRootedString(cx, args, 0));
4768 if (!str)
4769 return false;
4771 return Decode(cx, str, nullptr, args.rval());
4772 }
4774 static bool
4775 str_encodeURI(JSContext *cx, unsigned argc, Value *vp)
4776 {
4777 CallArgs args = CallArgsFromVp(argc, vp);
4778 Rooted<JSLinearString*> str(cx, ArgToRootedString(cx, args, 0));
4779 if (!str)
4780 return false;
4782 return Encode(cx, str, js_isUriUnescaped, js_isUriReservedPlusPound, args.rval());
4783 }
4785 static bool
4786 str_encodeURI_Component(JSContext *cx, unsigned argc, Value *vp)
4787 {
4788 CallArgs args = CallArgsFromVp(argc, vp);
4789 Rooted<JSLinearString*> str(cx, ArgToRootedString(cx, args, 0));
4790 if (!str)
4791 return false;
4793 return Encode(cx, str, js_isUriUnescaped, nullptr, args.rval());
4794 }
4796 /*
4797 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
4798 * least 4 bytes long. Return the number of UTF-8 bytes of data written.
4799 */
4800 int
4801 js_OneUcs4ToUtf8Char(uint8_t *utf8Buffer, uint32_t ucs4Char)
4802 {
4803 int utf8Length = 1;
4805 JS_ASSERT(ucs4Char <= 0x10FFFF);
4806 if (ucs4Char < 0x80) {
4807 *utf8Buffer = (uint8_t)ucs4Char;
4808 } else {
4809 int i;
4810 uint32_t a = ucs4Char >> 11;
4811 utf8Length = 2;
4812 while (a) {
4813 a >>= 5;
4814 utf8Length++;
4815 }
4816 i = utf8Length;
4817 while (--i) {
4818 utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80);
4819 ucs4Char >>= 6;
4820 }
4821 *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
4822 }
4823 return utf8Length;
4824 }
4826 size_t
4827 js::PutEscapedStringImpl(char *buffer, size_t bufferSize, FILE *fp, JSLinearString *str,
4828 uint32_t quote)
4829 {
4830 return PutEscapedStringImpl(buffer, bufferSize, fp, str->chars(),
4831 str->length(), quote);
4832 }
4834 size_t
4835 js::PutEscapedStringImpl(char *buffer, size_t bufferSize, FILE *fp, const jschar *chars,
4836 size_t length, uint32_t quote)
4837 {
4838 enum {
4839 STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE
4840 } state;
4842 JS_ASSERT(quote == 0 || quote == '\'' || quote == '"');
4843 JS_ASSERT_IF(!buffer, bufferSize == 0);
4844 JS_ASSERT_IF(fp, !buffer);
4846 if (bufferSize == 0)
4847 buffer = nullptr;
4848 else
4849 bufferSize--;
4851 const jschar *charsEnd = chars + length;
4852 size_t n = 0;
4853 state = FIRST_QUOTE;
4854 unsigned shift = 0;
4855 unsigned hex = 0;
4856 unsigned u = 0;
4857 char c = 0; /* to quell GCC warnings */
4859 for (;;) {
4860 switch (state) {
4861 case STOP:
4862 goto stop;
4863 case FIRST_QUOTE:
4864 state = CHARS;
4865 goto do_quote;
4866 case LAST_QUOTE:
4867 state = STOP;
4868 do_quote:
4869 if (quote == 0)
4870 continue;
4871 c = (char)quote;
4872 break;
4873 case CHARS:
4874 if (chars == charsEnd) {
4875 state = LAST_QUOTE;
4876 continue;
4877 }
4878 u = *chars++;
4879 if (u < ' ') {
4880 if (u != 0) {
4881 const char *escape = strchr(js_EscapeMap, (int)u);
4882 if (escape) {
4883 u = escape[1];
4884 goto do_escape;
4885 }
4886 }
4887 goto do_hex_escape;
4888 }
4889 if (u < 127) {
4890 if (u == quote || u == '\\')
4891 goto do_escape;
4892 c = (char)u;
4893 } else if (u < 0x100) {
4894 goto do_hex_escape;
4895 } else {
4896 shift = 16;
4897 hex = u;
4898 u = 'u';
4899 goto do_escape;
4900 }
4901 break;
4902 do_hex_escape:
4903 shift = 8;
4904 hex = u;
4905 u = 'x';
4906 do_escape:
4907 c = '\\';
4908 state = ESCAPE_START;
4909 break;
4910 case ESCAPE_START:
4911 JS_ASSERT(' ' <= u && u < 127);
4912 c = (char)u;
4913 state = ESCAPE_MORE;
4914 break;
4915 case ESCAPE_MORE:
4916 if (shift == 0) {
4917 state = CHARS;
4918 continue;
4919 }
4920 shift -= 4;
4921 u = 0xF & (hex >> shift);
4922 c = (char)(u + (u < 10 ? '0' : 'A' - 10));
4923 break;
4924 }
4925 if (buffer) {
4926 JS_ASSERT(n <= bufferSize);
4927 if (n != bufferSize) {
4928 buffer[n] = c;
4929 } else {
4930 buffer[n] = '\0';
4931 buffer = nullptr;
4932 }
4933 } else if (fp) {
4934 if (fputc(c, fp) < 0)
4935 return size_t(-1);
4936 }
4937 n++;
4938 }
4939 stop:
4940 if (buffer)
4941 buffer[n] = '\0';
4942 return n;
4943 }