diff options
Diffstat (limited to 'js/src/irregexp/RegExpParser.cpp')
-rw-r--r-- | js/src/irregexp/RegExpParser.cpp | 130 |
1 files changed, 41 insertions, 89 deletions
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index 1ad044e8e2..d4308d0d84 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -227,7 +227,6 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, alloc(alloc), captures_(nullptr), next_pos_(chars), - captures_started_(0), end_(end), current_(kEndMarker), capture_count_(0), @@ -420,8 +419,7 @@ RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to) static inline RegExpTree* NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to) { - return alloc->newInfallible<RegExpLookaround>(RangeAtom(alloc, from, to), false, - 0, 0, RegExpLookaround::LOOKAHEAD); + return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0); } static bool @@ -1216,38 +1214,6 @@ RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out) return true; } -template <typename CharT> -RegExpCapture* -RegExpParser<CharT>::GetCapture(int index) { - // The index for the capture groups are one-based. Its index in the list is - // zero-based. - int known_captures = - is_scanned_for_captures_ ? capture_count_ : captures_started_; - MOZ_ASSERT(index <= known_captures); - if (captures_ == NULL) { - captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc); - } - while ((int)captures_->length() < known_captures) { - RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(nullptr, captures_->length() + 1); - captures_->append(capture); - } - return (*captures_)[index - 1]; -} - - -template <typename CharT> -bool -RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(int index) { - for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { - if (s->group_type() != CAPTURE) continue; - // Return true if we found the matching capture index. - if (index == s->capture_index()) return true; - // Abort if index is larger than what has been parsed up till this state. - if (index > s->capture_index()) return false; - } - return false; -} - // QuantifierPrefix :: // { DecimalDigits } // { DecimalDigits , } @@ -1490,24 +1456,24 @@ RegExpTree* RegExpParser<CharT>::ParseDisjunction() { // Used to store current state while parsing subexpressions. - RegExpParserState initial_state(alloc, nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 0); - RegExpParserState* state = &initial_state; + RegExpParserState initial_state(alloc, nullptr, INITIAL, 0); + RegExpParserState* stored_state = &initial_state; // Cache the builder in a local variable for quick access. RegExpBuilder* builder = initial_state.builder(); while (true) { switch (current()) { case kEndMarker: - if (state->IsSubexpression()) { + if (stored_state->IsSubexpression()) { // Inside a parenthesized group when hitting end of input. return ReportError(JSMSG_MISSING_PAREN); } - MOZ_ASSERT(INITIAL == state->group_type()); + MOZ_ASSERT(INITIAL == stored_state->group_type()); // Parsing completed successfully. return builder->ToRegExp(); case ')': { - if (!state->IsSubexpression()) + if (!stored_state->IsSubexpression()) return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN); - MOZ_ASSERT(INITIAL != state->group_type()); + MOZ_ASSERT(INITIAL != stored_state->group_type()); Advance(); // End disjunction parsing and convert builder content to new single @@ -1516,30 +1482,29 @@ RegExpParser<CharT>::ParseDisjunction() int end_capture_index = captures_started(); - int capture_index = state->capture_index(); - SubexpressionType group_type = state->group_type(); + int capture_index = stored_state->capture_index(); + SubexpressionType group_type = stored_state->group_type(); + + // Restore previous state. + stored_state = stored_state->previous_state(); + builder = stored_state->builder(); // Build result of subexpression. if (group_type == CAPTURE) { - RegExpCapture* capture = GetCapture(capture_index); - capture->set_body(body); + RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index); + (*captures_)[capture_index - 1] = capture; body = capture; } else if (group_type != GROUPING) { - MOZ_ASSERT(group_type == POSITIVE_LOOKAROUND || - group_type == NEGATIVE_LOOKAROUND); - bool is_positive = (group_type == POSITIVE_LOOKAROUND); - body = alloc->newInfallible<RegExpLookaround>(body, + MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD || + group_type == NEGATIVE_LOOKAHEAD); + bool is_positive = (group_type == POSITIVE_LOOKAHEAD); + body = alloc->newInfallible<RegExpLookahead>(body, is_positive, end_capture_index - capture_index, - capture_index, - state->lookaround_type()); + capture_index); } - - // Restore previous state. - state = state->previous_state(); - builder = state->builder(); builder->AddAtom(body); - if (unicode_ && (group_type == POSITIVE_LOOKAROUND || group_type == NEGATIVE_LOOKAROUND)) + if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD)) continue; // For compatability with JSC and ES3, we allow quantifiers after // lookaheads, and break in all cases. @@ -1599,7 +1564,6 @@ RegExpParser<CharT>::ParseDisjunction() } case '(': { SubexpressionType subexpr_type = CAPTURE; - RegExpLookaround::Type lookaround_type = state->lookaround_type(); Advance(); if (current() == '?') { switch (Next()) { @@ -1607,39 +1571,26 @@ RegExpParser<CharT>::ParseDisjunction() subexpr_type = GROUPING; break; case '=': - lookaround_type = RegExpLookaround::LOOKAHEAD; - subexpr_type = POSITIVE_LOOKAROUND; + subexpr_type = POSITIVE_LOOKAHEAD; break; case '!': - lookaround_type = RegExpLookaround::LOOKAHEAD; - subexpr_type = NEGATIVE_LOOKAROUND; + subexpr_type = NEGATIVE_LOOKAHEAD; break; - case '<': - Advance(); - lookaround_type = RegExpLookaround::LOOKBEHIND; - if (Next() == '=') { - subexpr_type = POSITIVE_LOOKAROUND; - break; - } else if (Next() == '!') { - subexpr_type = NEGATIVE_LOOKAROUND; - break; - } - // We didn't get a positive or negative after '<'. - // That's an error. - return ReportError(JSMSG_INVALID_GROUP); default: return ReportError(JSMSG_INVALID_GROUP); } Advance(2); } else { + if (captures_ == nullptr) + captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc); if (captures_started() >= kMaxCaptures) return ReportError(JSMSG_TOO_MANY_PARENS); - captures_started_++; + captures_->append((RegExpCapture*) nullptr); } // Store current state and begin new disjunction parsing. - state = alloc->newInfallible<RegExpParserState>(alloc, state, subexpr_type, - lookaround_type, captures_started_); - builder = state->builder(); + stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type, + captures_started()); + builder = stored_state->builder(); continue; } case '[': { @@ -1694,18 +1645,19 @@ RegExpParser<CharT>::ParseDisjunction() case '7': case '8': case '9': { int index = 0; if (ParseBackReferenceIndex(&index)) { - if (state->IsInsideCaptureGroup(index)) { - // The backreference is inside the capture group it refers to. - // Nothing can possibly have been captured yet. - builder->AddEmpty(); - } else { - RegExpCapture* capture = GetCapture(index); - RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture); - if (unicode_) - builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom)); - else - builder->AddAtom(atom); + RegExpCapture* capture = nullptr; + if (captures_ != nullptr && index <= (int) captures_->length()) { + capture = (*captures_)[index - 1]; + } + if (capture == nullptr) { + builder->AddEmpty(); + break; } + RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture); + if (unicode_) + builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom)); + else + builder->AddAtom(atom); break; } if (unicode_) |