Issue #1302 - Add self-hosted implementation for string regex .matchAll

This resolves #1302.
pull/24/head
wolfbeast 3 years ago committed by Roy Tam
parent c12890f12c
commit c16e82c9b1
  1. 2
      js/public/Class.h
  2. 1
      js/src/builtin/RegExp.cpp
  3. 132
      js/src/builtin/RegExp.js
  4. 5
      js/src/builtin/SelfHostingDefines.h
  5. 27
      js/src/builtin/String.js
  6. 3
      js/src/jsapi.h
  7. 57
      js/src/jsiter.cpp
  8. 6
      js/src/jsiter.h
  9. 1
      js/src/jsstr.cpp
  10. 1
      js/src/vm/CommonPropertyNames.h
  11. 79
      js/src/vm/GlobalObject.cpp
  12. 8
      js/src/vm/GlobalObject.h
  13. 23
      js/src/vm/SelfHosting.cpp

@ -779,7 +779,7 @@ struct JSClass {
// application.
#define JSCLASS_GLOBAL_APPLICATION_SLOTS 5
#define JSCLASS_GLOBAL_SLOT_COUNT \
(JSCLASS_GLOBAL_APPLICATION_SLOTS + JSProto_LIMIT * 2 + 39)
(JSCLASS_GLOBAL_APPLICATION_SLOTS + JSProto_LIMIT * 2 + 40)
#define JSCLASS_GLOBAL_FLAGS_WITH_SLOTS(n) \
(JSCLASS_IS_GLOBAL | JSCLASS_HAS_RESERVED_SLOTS(JSCLASS_GLOBAL_SLOT_COUNT + (n)))
#define JSCLASS_GLOBAL_FLAGS \

@ -795,6 +795,7 @@ const JSFunctionSpec js::regexp_methods[] = {
JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1,0),
JS_SELF_HOSTED_FN("test", "RegExpTest" , 1,0),
JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1,0),
JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0),
JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2,0),
JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1,0),
JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2,0),

@ -1031,3 +1031,135 @@ function RegExpSpecies() {
return this;
}
_SetCanonicalName(RegExpSpecies, "get [Symbol.species]");
// String.prototype.matchAll proposal.
//
// RegExp.prototype [ @@matchAll ] ( string )
function RegExpMatchAll(string) {
// Step 1.
var rx = this;
// Step 2.
if (!IsObject(rx))
ThrowTypeError(JSMSG_NOT_NONNULL_OBJECT, rx === null ? "null" : typeof rx);
// Step 3.
var str = ToString(string);
// Step 4.
var C = SpeciesConstructor(rx, GetBuiltinConstructor("RegExp"));
// Step 5.
var flags = ToString(rx.flags);
// Step 2.b.iii; located here because it needs |flags|.
if (!callFunction(std_String_includes, flags, "g")) {
ThrowTypeError(JSMSG_BAD_REGEXP_FLAG, "- matchAll requires g");
}
// Step 6.
var matcher = new C(rx, flags);
// Steps 7-8.
matcher.lastIndex = ToLength(rx.lastIndex);
// Steps 9-12.
// Note, always global because non-global throws as per
// https://github.com/tc39/ecma262/pull/1716
var flags = REGEXP_GLOBAL_FLAG |
(callFunction(std_String_includes, flags, "u") ? REGEXP_UNICODE_FLAG : 0);
// Step 13.
return CreateRegExpStringIterator(matcher, str, flags);
}
// String.prototype.matchAll proposal.
//
// CreateRegExpStringIterator ( R, S, global, fullUnicode )
function CreateRegExpStringIterator(regexp, string, flags) {
// Step 1.
assert(typeof string === "string", "|string| is a string value");
// Steps 2-3.
assert(typeof flags === "number", "|flags| is a number value");
// Steps 4-9.
var iterator = NewRegExpStringIterator();
UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_REGEXP_SLOT, regexp);
UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_STRING_SLOT, string);
UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_FLAGS_SLOT, flags | 0);
UnsafeSetReservedSlot(iterator, REGEXP_STRING_ITERATOR_DONE_SLOT, false);
// Step 10.
return iterator;
}
// String.prototype.matchAll proposal.
//
// %RegExpStringIteratorPrototype%.next ( )
function RegExpStringIteratorNext() {
// Steps 1-3.
var obj;
if (!IsObject(this) || (obj = GuardToRegExpStringIterator(this)) === null) {
return callFunction(CallRegExpStringIteratorMethodIfWrapped, this,
"RegExpStringIteratorNext");
}
var result = { value: undefined, done: false };
// Step 4.
var done = UnsafeGetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT);
if (done) {
result.done = true;
return result;
}
// Step 5.
var regexp = UnsafeGetObjectFromReservedSlot(obj, REGEXP_STRING_ITERATOR_REGEXP_SLOT);
// Step 6.
var string = UnsafeGetStringFromReservedSlot(obj, REGEXP_STRING_ITERATOR_STRING_SLOT);
// Steps 7-8.
var flags = UnsafeGetInt32FromReservedSlot(obj, REGEXP_STRING_ITERATOR_FLAGS_SLOT);
var global = !!(flags & REGEXP_GLOBAL_FLAG);
var fullUnicode = !!(flags & REGEXP_UNICODE_FLAG);
// Step 9.
var match = RegExpExec(regexp, string, false);
// Step 10.
if (match === null) {
// Step 10.a.
UnsafeSetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT, true);
// Step 10.b.
result.done = true;
return result;
}
// Step 11.a.
if (global) {
// Step 11.a.i.
var matchStr = ToString(match[0]);
// Step 11.a.ii.
if (matchStr.length === 0) {
// Step 11.a.ii.1.
var thisIndex = ToLength(regexp.lastIndex);
// Step 11.a.ii.2.
var nextIndex = fullUnicode ? AdvanceStringIndex(string, thisIndex) : thisIndex + 1;
// Step 11.a.ii.3.
regexp.lastIndex = nextIndex;
}
} else {
// Step 11.b.i.
UnsafeSetReservedSlot(obj, REGEXP_STRING_ITERATOR_DONE_SLOT, true);
}
// Steps 11.a.iii and 11.b.ii.
result.value = match;
return result;
}

@ -92,6 +92,11 @@
#define REGEXP_UNICODE_FLAG 0x10
#define REGEXP_DOTALL_FLAG 0x20
#define REGEXP_STRING_ITERATOR_REGEXP_SLOT 0
#define REGEXP_STRING_ITERATOR_STRING_SLOT 1
#define REGEXP_STRING_ITERATOR_FLAGS_SLOT 2
#define REGEXP_STRING_ITERATOR_DONE_SLOT 3
#define MODULE_OBJECT_ENVIRONMENT_SLOT 2
#define MODULE_STATE_FAILED 0

@ -63,6 +63,33 @@ function String_generic_match(thisValue, regexp) {
return callFunction(String_match, thisValue, regexp);
}
// String.prototype.matchAll proposal.
//
// String.prototype.matchAll ( regexp )
function String_matchAll(regexp) {
// Step 1.
RequireObjectCoercible(this);
// Step 2.
if (regexp !== undefined && regexp !== null) {
// Step 2.a.
var matcher = GetMethod(regexp, std_matchAll);
// Step 2.b.
if (matcher !== undefined)
return callContentFunction(matcher, regexp, this);
}
// Step 3.
var string = ToString(this);
// Step 4.
var rx = RegExpCreate(regexp, "g");
// Step 5.
return callContentFunction(GetMethod(rx, std_matchAll), rx, string);
}
/**
* A helper function implementing the logic for both String.prototype.padStart
* and String.prototype.padEnd as described in ES7 Draft March 29, 2016

@ -5087,7 +5087,8 @@ GetSymbolDescription(HandleSymbol symbol);
macro(split) \
macro(toPrimitive) \
macro(toStringTag) \
macro(unscopables)
macro(unscopables) \
macro(matchAll)
enum class SymbolCode : uint32_t {
// There is one SymbolCode for each well-known symbol.

@ -24,6 +24,7 @@
#include "jstypes.h"
#include "jsutil.h"
#include "builtin/SelfHostingDefines.h"
#include "ds/Sort.h"
#include "gc/Marking.h"
#include "js/Proxy.h"
@ -1135,6 +1136,38 @@ static const JSFunctionSpec string_iterator_methods[] = {
JS_FS_END
};
static const Class RegExpStringIteratorPrototypeClass = {
"RegExp String Iterator",
0
};
enum {
RegExpStringIteratorSlotRegExp,
RegExpStringIteratorSlotString,
RegExpStringIteratorSlotFlags,
RegExpStringIteratorSlotDone,
RegExpStringIteratorSlotCount
};
static_assert(RegExpStringIteratorSlotRegExp == REGEXP_STRING_ITERATOR_REGEXP_SLOT,
"RegExpStringIteratorSlotRegExp must match self-hosting define for regexp slot.");
static_assert(RegExpStringIteratorSlotString == REGEXP_STRING_ITERATOR_STRING_SLOT,
"RegExpStringIteratorSlotString must match self-hosting define for string slot.");
static_assert(RegExpStringIteratorSlotFlags == REGEXP_STRING_ITERATOR_FLAGS_SLOT,
"RegExpStringIteratorSlotFlags must match self-hosting define for flags slot.");
static_assert(RegExpStringIteratorSlotDone == REGEXP_STRING_ITERATOR_DONE_SLOT,
"RegExpStringIteratorSlotDone must match self-hosting define for done slot.");
const Class RegExpStringIteratorObject::class_ = {
"RegExp String Iterator",
JSCLASS_HAS_RESERVED_SLOTS(RegExpStringIteratorSlotCount)
};
static const JSFunctionSpec regexp_string_iterator_methods[] = {
JS_SELF_HOSTED_FN("next", "RegExpStringIteratorNext", 0, 0),
JS_FS_END
};
JSObject*
js::ValueToIterator(JSContext* cx, unsigned flags, HandleValue vp)
{
@ -1541,6 +1574,30 @@ GlobalObject::initStringIteratorProto(JSContext* cx, Handle<GlobalObject*> globa
return true;
}
/* static */ bool
GlobalObject::initRegExpStringIteratorProto(JSContext* cx, Handle<GlobalObject*> global)
{
if (global->getReservedSlot(REGEXP_STRING_ITERATOR_PROTO).isObject())
return true;
RootedObject iteratorProto(cx, GlobalObject::getOrCreateIteratorPrototype(cx, global));
if (!iteratorProto)
return false;
const Class* cls = &RegExpStringIteratorPrototypeClass;
RootedObject proto(cx, GlobalObject::createBlankPrototypeInheriting(cx, global, cls,
iteratorProto));
if (!proto ||
!DefinePropertiesAndFunctions(cx, proto, nullptr, regexp_string_iterator_methods) ||
!DefineToStringTag(cx, proto, cx->names().RegExpStringIterator))
{
return false;
}
global->setReservedSlot(REGEXP_STRING_ITERATOR_PROTO, ObjectValue(*proto));
return true;
}
JSObject*
js::InitLegacyIteratorClass(JSContext* cx, HandleObject obj)
{

@ -151,6 +151,12 @@ class StringIteratorObject : public JSObject
static const Class class_;
};
class RegExpStringIteratorObject : public JSObject
{
public:
static const Class class_;
};
bool
GetIterator(JSContext* cx, HandleObject obj, unsigned flags, MutableHandleObject objp);

@ -2584,6 +2584,7 @@ static const JSFunctionSpec string_methods[] = {
/* Perl-ish methods (search is actually Python-esque). */
JS_SELF_HOSTED_FN("match", "String_match", 1,0),
JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1,0),
JS_SELF_HOSTED_FN("search", "String_search", 1,0),
JS_SELF_HOSTED_FN("replace", "String_replace", 2,0),
JS_SELF_HOSTED_FN("split", "String_split", 2,0),

@ -282,6 +282,7 @@
macro(RegExpFlagsGetter, RegExpFlagsGetter, "RegExpFlagsGetter") \
macro(RegExpMatcher, RegExpMatcher, "RegExpMatcher") \
macro(RegExpSearcher, RegExpSearcher, "RegExpSearcher") \
macro(RegExpStringIterator, RegExpStringIterator, "RegExp String Iterator") \
macro(RegExpTester, RegExpTester, "RegExpTester") \
macro(RegExp_prototype_Exec, RegExp_prototype_Exec, "RegExp_prototype_Exec") \
macro(Reify, Reify, "Reify") \

@ -468,62 +468,29 @@ GlobalObject::initSelfHostingBuiltins(JSContext* cx, Handle<GlobalObject*> globa
return false;
}
RootedValue std_isConcatSpreadable(cx);
std_isConcatSpreadable.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::isConcatSpreadable));
if (!JS_DefineProperty(cx, global, "std_isConcatSpreadable", std_isConcatSpreadable,
JSPROP_PERMANENT | JSPROP_READONLY))
{
return false;
}
// Define a top-level property 'std_iterator' with the name of the method
// used by for-of loops to create an iterator.
RootedValue std_iterator(cx);
std_iterator.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::iterator));
if (!JS_DefineProperty(cx, global, "std_iterator", std_iterator,
JSPROP_PERMANENT | JSPROP_READONLY))
{
return false;
}
RootedValue std_match(cx);
std_match.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::match));
if (!JS_DefineProperty(cx, global, "std_match", std_match,
JSPROP_PERMANENT | JSPROP_READONLY))
{
return false;
}
RootedValue std_replace(cx);
std_replace.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::replace));
if (!JS_DefineProperty(cx, global, "std_replace", std_replace,
JSPROP_PERMANENT | JSPROP_READONLY))
{
return false;
}
RootedValue std_search(cx);
std_search.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::search));
if (!JS_DefineProperty(cx, global, "std_search", std_search,
JSPROP_PERMANENT | JSPROP_READONLY))
{
return false;
}
RootedValue std_species(cx);
std_species.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::species));
if (!JS_DefineProperty(cx, global, "std_species", std_species,
JSPROP_PERMANENT | JSPROP_READONLY))
{
return false;
}
RootedValue std_split(cx);
std_split.setSymbol(cx->wellKnownSymbols().get(JS::SymbolCode::split));
if (!JS_DefineProperty(cx, global, "std_split", std_split,
JSPROP_PERMANENT | JSPROP_READONLY))
{
return false;
struct SymbolAndName {
JS::SymbolCode code;
const char* name;
};
SymbolAndName wellKnownSymbols[] = {
{JS::SymbolCode::isConcatSpreadable, "std_isConcatSpreadable"},
{JS::SymbolCode::iterator, "std_iterator"},
{JS::SymbolCode::match, "std_match"},
{JS::SymbolCode::matchAll, "std_matchAll"},
{JS::SymbolCode::replace, "std_replace"},
{JS::SymbolCode::search, "std_search"},
{JS::SymbolCode::species, "std_species"},
{JS::SymbolCode::split, "std_split"},
};
RootedValue symVal(cx);
for (const auto& sym : wellKnownSymbols) {
symVal.setSymbol(cx->wellKnownSymbols().get(sym.code));
if (!JS_DefineProperty(cx, global, sym.name, symVal,
JSPROP_PERMANENT | JSPROP_READONLY)) {
return false;
}
}
return InitBareBuiltinCtor(cx, global, JSProto_Array) &&

@ -93,6 +93,7 @@ class GlobalObject : public NativeObject
ITERATOR_PROTO,
ARRAY_ITERATOR_PROTO,
STRING_ITERATOR_PROTO,
REGEXP_STRING_ITERATOR_PROTO,
LEGACY_GENERATOR_OBJECT_PROTO,
STAR_GENERATOR_OBJECT_PROTO,
STAR_GENERATOR_FUNCTION_PROTO,
@ -582,6 +583,12 @@ class GlobalObject : public NativeObject
initStringIteratorProto));
}
static NativeObject*
getOrCreateRegExpStringIteratorPrototype(JSContext* cx, Handle<GlobalObject*> global) {
return MaybeNativeObject(getOrCreateObject(cx, global, REGEXP_STRING_ITERATOR_PROTO,
initRegExpStringIteratorProto));
}
static NativeObject*
getOrCreateLegacyGeneratorObjectPrototype(JSContext* cx, Handle<GlobalObject*> global) {
return MaybeNativeObject(getOrCreateObject(cx, global, LEGACY_GENERATOR_OBJECT_PROTO,
@ -767,6 +774,7 @@ class GlobalObject : public NativeObject
static bool initIteratorProto(JSContext* cx, Handle<GlobalObject*> global);
static bool initArrayIteratorProto(JSContext* cx, Handle<GlobalObject*> global);
static bool initStringIteratorProto(JSContext* cx, Handle<GlobalObject*> global);
static bool initRegExpStringIteratorProto(JSContext* cx, Handle<GlobalObject*> global);
// Implemented in vm/GeneratorObject.cpp.
static bool initLegacyGeneratorProto(JSContext* cx, Handle<GlobalObject*> global);

@ -856,6 +856,24 @@ intrinsic_NewStringIterator(JSContext* cx, unsigned argc, Value* vp)
return true;
}
static bool
intrinsic_NewRegExpStringIterator(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
MOZ_ASSERT(args.length() == 0);
RootedObject proto(cx, GlobalObject::getOrCreateRegExpStringIteratorPrototype(cx, cx->global()));
if (!proto)
return false;
JSObject* obj = NewObjectWithGivenProto(cx, &RegExpStringIteratorObject::class_, proto);
if (!obj)
return false;
args.rval().setObject(*obj);
return true;
}
static bool
intrinsic_SetCanonicalName(JSContext* cx, unsigned argc, Value* vp)
{
@ -2288,6 +2306,8 @@ static const JSFunctionSpec intrinsic_functions[] = {
JS_INLINABLE_FN("GuardToStringIterator",
intrinsic_GuardToBuiltin<StringIteratorObject>, 1,0,
IntrinsicGuardToStringIterator),
JS_FN("GuardToRegExpStringIterator",
intrinsic_GuardToBuiltin<RegExpStringIteratorObject>, 1,0),
JS_FN("_CreateMapIterationResultPair", intrinsic_CreateMapIterationResultPair, 0, 0),
JS_INLINABLE_FN("_GetNextMapEntryForIterator", intrinsic_GetNextMapEntryForIterator, 2,0,
@ -2305,6 +2325,9 @@ static const JSFunctionSpec intrinsic_functions[] = {
JS_FN("NewStringIterator", intrinsic_NewStringIterator, 0,0),
JS_FN("CallStringIteratorMethodIfWrapped",
CallNonGenericSelfhostedMethod<Is<StringIteratorObject>>, 2,0),
JS_FN("NewRegExpStringIterator", intrinsic_NewRegExpStringIterator, 0,0),
JS_FN("CallRegExpStringIteratorMethodIfWrapped",
CallNonGenericSelfhostedMethod<Is<RegExpStringIteratorObject>>, 2,0),
JS_FN("IsStarGeneratorObject",
intrinsic_IsInstanceOfBuiltin<StarGeneratorObject>, 1,0),

Loading…
Cancel
Save