diff options
Diffstat (limited to 'js/src/frontend/TokenStream.h')
-rw-r--r-- | js/src/frontend/TokenStream.h | 1057 |
1 files changed, 1057 insertions, 0 deletions
diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h new file mode 100644 index 0000000000..29dcead62e --- /dev/null +++ b/js/src/frontend/TokenStream.h @@ -0,0 +1,1057 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef frontend_TokenStream_h +#define frontend_TokenStream_h + +// JS lexical scanner interface. + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/PodOperations.h" + +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> + +#include "jscntxt.h" +#include "jspubtd.h" + +#include "frontend/TokenKind.h" +#include "js/UniquePtr.h" +#include "js/Vector.h" +#include "vm/RegExpObject.h" + +struct KeywordInfo; + +namespace js { +namespace frontend { + +class AutoAwaitIsKeyword; + +struct TokenPos { + uint32_t begin; // Offset of the token's first char. + uint32_t end; // Offset of 1 past the token's last char. + + TokenPos() {} + TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {} + + // Return a TokenPos that covers left, right, and anything in between. + static TokenPos box(const TokenPos& left, const TokenPos& right) { + MOZ_ASSERT(left.begin <= left.end); + MOZ_ASSERT(left.end <= right.begin); + MOZ_ASSERT(right.begin <= right.end); + return TokenPos(left.begin, right.end); + } + + bool operator==(const TokenPos& bpos) const { + return begin == bpos.begin && end == bpos.end; + } + + bool operator!=(const TokenPos& bpos) const { + return begin != bpos.begin || end != bpos.end; + } + + bool operator <(const TokenPos& bpos) const { + return begin < bpos.begin; + } + + bool operator <=(const TokenPos& bpos) const { + return begin <= bpos.begin; + } + + bool operator >(const TokenPos& bpos) const { + return !(*this <= bpos); + } + + bool operator >=(const TokenPos& bpos) const { + return !(*this < bpos); + } + + bool encloses(const TokenPos& pos) const { + return begin <= pos.begin && pos.end <= end; + } +}; + +enum DecimalPoint { NoDecimal = false, HasDecimal = true }; + +class TokenStream; + +struct Token +{ + private: + // Sometimes the parser needs to inform the tokenizer to interpret + // subsequent text in a particular manner: for example, to tokenize a + // keyword as an identifier, not as the actual keyword, on the right-hand + // side of a dotted property access. Such information is communicated to + // the tokenizer as a Modifier when getting the next token. + // + // Ideally this definition would reside in TokenStream as that's the real + // user, but the debugging-use of it here causes a cyclic dependency (and + // C++ provides no way to forward-declare an enum inside a class). So + // define it here, then typedef it into TokenStream with static consts to + // bring the initializers into scope. + enum Modifier + { + // Normal operation. + None, + + // Looking for an operand, not an operator. In practice, this means + // that when '/' is seen, we look for a regexp instead of just returning + // TOK_DIV. + Operand, + + // Treat keywords as names by returning TOK_NAME. + KeywordIsName, + + // Treat subsequent characters as the tail of a template literal, after + // a template substitution, beginning with a "}", continuing with zero + // or more template literal characters, and ending with either "${" or + // the end of the template literal. For example: + // + // var entity = "world"; + // var s = `Hello ${entity}!`; + // ^ TemplateTail context + TemplateTail, + }; + enum ModifierException + { + NoException, + + // Used in following 2 cases: + // a) After |yield| we look for a token on the same line that starts an + // expression (Operand): |yield <expr>|. If no token is found, the + // |yield| stands alone, and the next token on a subsequent line must + // be: a comma continuing a comma expression, a semicolon terminating + // the statement that ended with |yield|, or the start of another + // statement (possibly an expression statement). The comma/semicolon + // cases are gotten as operators (None), contrasting with Operand + // earlier. + // b) After an arrow function with a block body in an expression + // statement, the next token must be: a colon in a conditional + // expression, a comma continuing a comma expression, a semicolon + // terminating the statement, or the token on a subsequent line that is + // the start of another statement (possibly an expression statement). + // Colon is gotten as operator (None), and it should only be gotten in + // conditional expression and missing it results in SyntaxError. + // Comma/semicolon cases are also gotten as operators (None), and 4th + // case is gotten after them. If no comma/semicolon found but EOL, + // the next token should be gotten as operand in 4th case (especially if + // '/' is the first character). So we should peek the token as + // operand before try getting colon/comma/semicolon. + // See also the comment in Parser::assignExpr(). + NoneIsOperand, + + // If a semicolon is inserted automatically, the next token is already + // gotten with None, but we expect Operand. + OperandIsNone, + + // If name of method definition is `get` or `set`, the next token is + // already gotten with KeywordIsName, but we expect None. + NoneIsKeywordIsName, + }; + friend class TokenStream; + + public: + TokenKind type; // char value or above enumerator + TokenPos pos; // token position in file + union { + private: + friend struct Token; + PropertyName* name; // non-numeric atom + JSAtom* atom; // potentially-numeric atom + struct { + double value; // floating point number + DecimalPoint decimalPoint; // literal contains '.' + } number; + RegExpFlag reflags; // regexp flags; use tokenbuf to access + // regexp chars + } u; +#ifdef DEBUG + Modifier modifier; // Modifier used to get this token + ModifierException modifierException; // Exception for this modifier +#endif + + // Mutators + + void setName(PropertyName* name) { + MOZ_ASSERT(type == TOK_NAME); + u.name = name; + } + + void setAtom(JSAtom* atom) { + MOZ_ASSERT(type == TOK_STRING || + type == TOK_TEMPLATE_HEAD || + type == TOK_NO_SUBS_TEMPLATE); + u.atom = atom; + } + + void setRegExpFlags(js::RegExpFlag flags) { + MOZ_ASSERT(type == TOK_REGEXP); + MOZ_ASSERT((flags & AllFlags) == flags); + u.reflags = flags; + } + + void setNumber(double n, DecimalPoint decimalPoint) { + MOZ_ASSERT(type == TOK_NUMBER); + u.number.value = n; + u.number.decimalPoint = decimalPoint; + } + + // Type-safe accessors + + PropertyName* name() const { + MOZ_ASSERT(type == TOK_NAME); + return u.name->JSAtom::asPropertyName(); // poor-man's type verification + } + + bool nameContainsEscape() const { + PropertyName* n = name(); + return pos.begin + n->length() != pos.end; + } + + JSAtom* atom() const { + MOZ_ASSERT(type == TOK_STRING || + type == TOK_TEMPLATE_HEAD || + type == TOK_NO_SUBS_TEMPLATE); + return u.atom; + } + + js::RegExpFlag regExpFlags() const { + MOZ_ASSERT(type == TOK_REGEXP); + MOZ_ASSERT((u.reflags & AllFlags) == u.reflags); + return u.reflags; + } + + double number() const { + MOZ_ASSERT(type == TOK_NUMBER); + return u.number.value; + } + + DecimalPoint decimalPoint() const { + MOZ_ASSERT(type == TOK_NUMBER); + return u.number.decimalPoint; + } +}; + +class CompileError : public JSErrorReport { +public: + void throwError(JSContext* cx); +}; + +// Ideally, tokenizing would be entirely independent of context. But the +// strict mode flag, which is in SharedContext, affects tokenizing, and +// TokenStream needs to see it. +// +// This class is a tiny back-channel from TokenStream to the strict mode flag +// that avoids exposing the rest of SharedContext to TokenStream. +// +class StrictModeGetter { + public: + virtual bool strictMode() = 0; +}; + +// TokenStream is the lexical scanner for Javascript source text. +// +// It takes a buffer of char16_t characters and linearly scans it into |Token|s. +// Internally the class uses a four element circular buffer |tokens| of +// |Token|s. As an index for |tokens|, the member |cursor| points to the +// current token. +// Calls to getToken() increase |cursor| by one and return the new current +// token. If a TokenStream was just created, the current token is initialized +// with random data (i.e. not initialized). It is therefore important that +// one of the first four member functions listed below is called first. +// The circular buffer lets us go back up to two tokens from the last +// scanned token. Internally, the relative number of backward steps that were +// taken (via ungetToken()) after the last token was scanned is stored in +// |lookahead|. +// +// The following table lists in which situations it is safe to call each listed +// function. No checks are made by the functions in non-debug builds. +// +// Function Name | Precondition; changes to |lookahead| +// ------------------+--------------------------------------------------------- +// getToken | none; if |lookahead > 0| then |lookahead--| +// peekToken | none; if |lookahead == 0| then |lookahead == 1| +// peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1| +// matchToken | none; if |lookahead > 0| and the match succeeds then +// | |lookahead--| +// consumeKnownToken | none; if |lookahead > 0| then |lookahead--| +// ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++| +// +// The behavior of the token scanning process (see getTokenInternal()) can be +// modified by calling one of the first four above listed member functions with +// an optional argument of type Modifier. However, the modifier will be +// ignored unless |lookahead == 0| holds. Due to constraints of the grammar, +// this turns out not to be a problem in practice. See the +// mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?' +// for more details: +// https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E). +// +// The methods seek() and tell() allow to rescan from a previous visited +// location of the buffer. +// +class MOZ_STACK_CLASS TokenStream +{ + // Unicode separators that are treated as line terminators, in addition to \n, \r. + enum { + LINE_SEPARATOR = 0x2028, + PARA_SEPARATOR = 0x2029 + }; + + static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded + // to power of 2 to avoid divmod by 3 + static const unsigned maxLookahead = 2; + static const unsigned ntokensMask = ntokens - 1; + + public: + typedef Vector<char16_t, 32> CharBuffer; + + TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options, + const char16_t* base, size_t length, StrictModeGetter* smg); + + ~TokenStream(); + + MOZ_MUST_USE bool checkOptions(); + + // Accessors. + const Token& currentToken() const { return tokens[cursor]; } + bool isCurrentTokenType(TokenKind type) const { + return currentToken().type == type; + } + const CharBuffer& getTokenbuf() const { return tokenbuf; } + const char* getFilename() const { return filename; } + bool getMutedErrors() const { return mutedErrors; } + JSVersion versionNumber() const { return VersionNumber(options().version); } + JSVersion versionWithFlags() const { return options().version; } + + PropertyName* currentName() const { + if (isCurrentTokenType(TOK_YIELD)) + return cx->names().yield; + MOZ_ASSERT(isCurrentTokenType(TOK_NAME)); + return currentToken().name(); + } + + PropertyName* nextName() const { + if (nextToken().type == TOK_YIELD) + return cx->names().yield; + MOZ_ASSERT(nextToken().type == TOK_NAME); + return nextToken().name(); + } + + bool nextNameContainsEscape() const { + if (nextToken().type == TOK_YIELD) + return false; + MOZ_ASSERT(nextToken().type == TOK_NAME); + return nextToken().nameContainsEscape(); + } + + bool isCurrentTokenAssignment() const { + return TokenKindIsAssignment(currentToken().type); + } + + // Flag methods. + bool isEOF() const { return flags.isEOF; } + bool sawOctalEscape() const { return flags.sawOctalEscape; } + bool hadError() const { return flags.hadError; } + void clearSawOctalEscape() { flags.sawOctalEscape = false; } + + // TokenStream-specific error reporters. + bool reportError(unsigned errorNumber, ...); + bool reportErrorNoOffset(unsigned errorNumber, ...); + bool reportWarning(unsigned errorNumber, ...); + + static const uint32_t NoOffset = UINT32_MAX; + + // General-purpose error reporters. You should avoid calling these + // directly, and instead use the more succinct alternatives (e.g. + // reportError()) in TokenStream, Parser, and BytecodeEmitter. + bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, + va_list args); + bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, + va_list args); + bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, + va_list args); + + // asm.js reporter + void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...); + + JSAtom* getRawTemplateStringAtom() { + MOZ_ASSERT(currentToken().type == TOK_TEMPLATE_HEAD || + currentToken().type == TOK_NO_SUBS_TEMPLATE); + const char16_t* cur = userbuf.rawCharPtrAt(currentToken().pos.begin + 1); + const char16_t* end; + if (currentToken().type == TOK_TEMPLATE_HEAD) { + // Of the form |`...${| or |}...${| + end = userbuf.rawCharPtrAt(currentToken().pos.end - 2); + } else { + // NO_SUBS_TEMPLATE is of the form |`...`| or |}...`| + end = userbuf.rawCharPtrAt(currentToken().pos.end - 1); + } + + CharBuffer charbuf(cx); + while (cur < end) { + int32_t ch = *cur; + if (ch == '\r') { + ch = '\n'; + if ((cur + 1 < end) && (*(cur + 1) == '\n')) + cur++; + } + if (!charbuf.append(ch)) + return nullptr; + cur++; + } + return AtomizeChars(cx, charbuf.begin(), charbuf.length()); + } + + private: + // These are private because they should only be called by the tokenizer + // while tokenizing not by, for example, BytecodeEmitter. + bool reportStrictModeError(unsigned errorNumber, ...); + bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); } + + static JSAtom* atomize(ExclusiveContext* cx, CharBuffer& cb); + MOZ_MUST_USE bool putIdentInTokenbuf(const char16_t* identStart); + + struct Flags + { + bool isEOF:1; // Hit end of file. + bool isDirtyLine:1; // Non-whitespace since start of line. + bool sawOctalEscape:1; // Saw an octal character escape. + bool hadError:1; // Hit a syntax error, at start or during a + // token. + bool hitOOM:1; // Hit OOM. + + Flags() + : isEOF(), isDirtyLine(), sawOctalEscape(), hadError(), hitOOM() + {} + }; + + bool awaitIsKeyword = false; + friend class AutoAwaitIsKeyword; + + public: + typedef Token::Modifier Modifier; + static constexpr Modifier None = Token::None; + static constexpr Modifier Operand = Token::Operand; + static constexpr Modifier KeywordIsName = Token::KeywordIsName; + static constexpr Modifier TemplateTail = Token::TemplateTail; + + typedef Token::ModifierException ModifierException; + static constexpr ModifierException NoException = Token::NoException; + static constexpr ModifierException NoneIsOperand = Token::NoneIsOperand; + static constexpr ModifierException OperandIsNone = Token::OperandIsNone; + static constexpr ModifierException NoneIsKeywordIsName = Token::NoneIsKeywordIsName; + + void addModifierException(ModifierException modifierException) { +#ifdef DEBUG + const Token& next = nextToken(); + if (next.modifierException == NoneIsOperand) + { + // Token after yield expression without operand already has + // NoneIsOperand exception. + MOZ_ASSERT(modifierException == OperandIsNone); + MOZ_ASSERT(next.type != TOK_DIV, + "next token requires contextual specifier to be parsed unambiguously"); + + // Do not update modifierException. + return; + } + + MOZ_ASSERT(next.modifierException == NoException); + switch (modifierException) { + case NoneIsOperand: + MOZ_ASSERT(next.modifier == Operand); + MOZ_ASSERT(next.type != TOK_DIV, + "next token requires contextual specifier to be parsed unambiguously"); + break; + case OperandIsNone: + MOZ_ASSERT(next.modifier == None); + MOZ_ASSERT(next.type != TOK_DIV && next.type != TOK_REGEXP, + "next token requires contextual specifier to be parsed unambiguously"); + break; + case NoneIsKeywordIsName: + MOZ_ASSERT(next.modifier == KeywordIsName); + MOZ_ASSERT(next.type != TOK_NAME); + break; + default: + MOZ_CRASH("unexpected modifier exception"); + } + tokens[(cursor + 1) & ntokensMask].modifierException = modifierException; +#endif + } + + void + verifyConsistentModifier(Modifier modifier, Token lookaheadToken) { +#ifdef DEBUG + // Easy case: modifiers match. + if (modifier == lookaheadToken.modifier) + return; + + if (lookaheadToken.modifierException == OperandIsNone) { + // getToken(Operand) permissibly following getToken(). + if (modifier == Operand && lookaheadToken.modifier == None) + return; + } + + if (lookaheadToken.modifierException == NoneIsOperand) { + // getToken() permissibly following getToken(Operand). + if (modifier == None && lookaheadToken.modifier == Operand) + return; + } + + if (lookaheadToken.modifierException == NoneIsKeywordIsName) { + // getToken() permissibly following getToken(KeywordIsName). + if (modifier == None && lookaheadToken.modifier == KeywordIsName) + return; + } + + MOZ_ASSERT_UNREACHABLE("this token was previously looked up with a " + "different modifier, potentially making " + "tokenization non-deterministic"); +#endif + } + + // Advance to the next token. If the token stream encountered an error, + // return false. Otherwise return true and store the token kind in |*ttp|. + MOZ_MUST_USE bool getToken(TokenKind* ttp, Modifier modifier = None) { + // Check for a pushed-back token resulting from mismatching lookahead. + if (lookahead != 0) { + MOZ_ASSERT(!flags.hadError); + lookahead--; + cursor = (cursor + 1) & ntokensMask; + TokenKind tt = currentToken().type; + MOZ_ASSERT(tt != TOK_EOL); + verifyConsistentModifier(modifier, currentToken()); + *ttp = tt; + return true; + } + + return getTokenInternal(ttp, modifier); + } + + // Push the last scanned token back into the stream. + void ungetToken() { + MOZ_ASSERT(lookahead < maxLookahead); + lookahead++; + cursor = (cursor - 1) & ntokensMask; + } + + MOZ_MUST_USE bool peekToken(TokenKind* ttp, Modifier modifier = None) { + if (lookahead > 0) { + MOZ_ASSERT(!flags.hadError); + verifyConsistentModifier(modifier, nextToken()); + *ttp = nextToken().type; + return true; + } + if (!getTokenInternal(ttp, modifier)) + return false; + ungetToken(); + return true; + } + + MOZ_MUST_USE bool peekTokenPos(TokenPos* posp, Modifier modifier = None) { + if (lookahead == 0) { + TokenKind tt; + if (!getTokenInternal(&tt, modifier)) + return false; + ungetToken(); + MOZ_ASSERT(hasLookahead()); + } else { + MOZ_ASSERT(!flags.hadError); + verifyConsistentModifier(modifier, nextToken()); + } + *posp = nextToken().pos; + return true; + } + + // This is like peekToken(), with one exception: if there is an EOL + // between the end of the current token and the start of the next token, it + // return true and store TOK_EOL in |*ttp|. In that case, no token with + // TOK_EOL is actually created, just a TOK_EOL TokenKind is returned, and + // currentToken() shouldn't be consulted. (This is the only place TOK_EOL + // is produced.) + MOZ_ALWAYS_INLINE MOZ_MUST_USE bool + peekTokenSameLine(TokenKind* ttp, Modifier modifier = None) { + const Token& curr = currentToken(); + + // If lookahead != 0, we have scanned ahead at least one token, and + // |lineno| is the line that the furthest-scanned token ends on. If + // it's the same as the line that the current token ends on, that's a + // stronger condition than what we are looking for, and we don't need + // to return TOK_EOL. + if (lookahead != 0) { + bool onThisLine; + if (!srcCoords.isOnThisLine(curr.pos.end, lineno, &onThisLine)) + return reportError(JSMSG_OUT_OF_MEMORY); + if (onThisLine) { + MOZ_ASSERT(!flags.hadError); + verifyConsistentModifier(modifier, nextToken()); + *ttp = nextToken().type; + return true; + } + } + + // The above check misses two cases where we don't have to return + // TOK_EOL. + // - The next token starts on the same line, but is a multi-line token. + // - The next token starts on the same line, but lookahead==2 and there + // is a newline between the next token and the one after that. + // The following test is somewhat expensive but gets these cases (and + // all others) right. + TokenKind tmp; + if (!getToken(&tmp, modifier)) + return false; + const Token& next = currentToken(); + ungetToken(); + + *ttp = srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin) + ? next.type + : TOK_EOL; + return true; + } + + // Get the next token from the stream if its kind is |tt|. + MOZ_MUST_USE bool matchToken(bool* matchedp, TokenKind tt, Modifier modifier = None) { + TokenKind token; + if (!getToken(&token, modifier)) + return false; + if (token == tt) { + *matchedp = true; + } else { + ungetToken(); + *matchedp = false; + } + return true; + } + + void consumeKnownToken(TokenKind tt, Modifier modifier = None) { + bool matched; + MOZ_ASSERT(hasLookahead()); + MOZ_ALWAYS_TRUE(matchToken(&matched, tt, modifier)); + MOZ_ALWAYS_TRUE(matched); + } + + // Like matchToken(..., TOK_NAME) but further matching the name token only + // if it has the given characters, without containing escape sequences. + // If the name token has the given characters yet *does* contain an escape, + // a syntax error will be reported. + // + // This latter behavior makes this method unsuitable for use in any context + // where ASI might occur. In such places, an escaped "contextual keyword" + // on a new line is the start of an ExpressionStatement, not a continuation + // of a StatementListItem (or ImportDeclaration or ExportDeclaration, in + // modules). + MOZ_MUST_USE bool matchContextualKeyword(bool* matchedp, Handle<PropertyName*> keyword, + Modifier modifier = None) + { + TokenKind token; + if (!getToken(&token, modifier)) + return false; + if (token == TOK_NAME && currentToken().name() == keyword) { + if (currentToken().nameContainsEscape()) { + reportError(JSMSG_ESCAPED_KEYWORD); + return false; + } + + *matchedp = true; + } else { + *matchedp = false; + ungetToken(); + } + return true; + } + + MOZ_MUST_USE bool nextTokenEndsExpr(bool* endsExpr) { + TokenKind tt; + if (!peekToken(&tt)) + return false; + *endsExpr = isExprEnding[tt]; + return true; + } + + class MOZ_STACK_CLASS Position { + public: + // The Token fields may contain pointers to atoms, so for correct + // rooting we must ensure collection of atoms is disabled while objects + // of this class are live. Do this by requiring a dummy AutoKeepAtoms + // reference in the constructor. + // + // This class is explicity ignored by the analysis, so don't add any + // more pointers to GC things here! + explicit Position(AutoKeepAtoms&) { } + private: + Position(const Position&) = delete; + friend class TokenStream; + const char16_t* buf; + Flags flags; + unsigned lineno; + size_t linebase; + size_t prevLinebase; + Token currentToken; + unsigned lookahead; + Token lookaheadTokens[maxLookahead]; + }; + + MOZ_MUST_USE bool advance(size_t position); + void tell(Position*); + void seek(const Position& pos); + MOZ_MUST_USE bool seek(const Position& pos, const TokenStream& other); +#ifdef DEBUG + inline bool debugHasNoLookahead() const { + return lookahead == 0; + } +#endif + + const char16_t* rawCharPtrAt(size_t offset) const { + return userbuf.rawCharPtrAt(offset); + } + + const char16_t* rawLimit() const { + return userbuf.limit(); + } + + bool hasDisplayURL() const { + return displayURL_ != nullptr; + } + + char16_t* displayURL() { + return displayURL_.get(); + } + + bool hasSourceMapURL() const { + return sourceMapURL_ != nullptr; + } + + char16_t* sourceMapURL() { + return sourceMapURL_.get(); + } + + // If |atom| is not a keyword in this version, return true with *ttp + // unchanged. + // + // If it is a reserved word in this version and strictness mode, and thus + // can't be present in correct code, report a SyntaxError and return false. + // + // If it is a keyword, like "if", return true with the keyword's TokenKind + // in *ttp. + MOZ_MUST_USE bool checkForKeyword(JSAtom* atom, TokenKind* ttp); + + // Same semantics as above, but for the provided keyword. + MOZ_MUST_USE bool checkForKeyword(const KeywordInfo* kw, TokenKind* ttp); + + // This class maps a userbuf offset (which is 0-indexed) to a line number + // (which is 1-indexed) and a column index (which is 0-indexed). + class SourceCoords + { + // For a given buffer holding source code, |lineStartOffsets_| has one + // element per line of source code, plus one sentinel element. Each + // non-sentinel element holds the buffer offset for the start of the + // corresponding line of source code. For this example script: + // + // 1 // xyz [line starts at offset 0] + // 2 var x; [line starts at offset 7] + // 3 [line starts at offset 14] + // 4 var y; [line starts at offset 15] + // + // |lineStartOffsets_| is: + // + // [0, 7, 14, 15, MAX_PTR] + // + // To convert a "line number" to a "line index" (i.e. an index into + // |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's + // line index is (3 - initialLineNum_), which is 2. Therefore + // lineStartOffsets_[2] holds the buffer offset for the start of line 3, + // which is 14. (Note that |initialLineNum_| is often 1, but not + // always.) + // + // The first element is always 0, and the last element is always the + // MAX_PTR sentinel. + // + // offset-to-line/column lookups are O(log n) in the worst case (binary + // search), but in practice they're heavily clustered and we do better + // than that by using the previous lookup's result (lastLineIndex_) as + // a starting point. + // + // Checking if an offset lies within a particular line number + // (isOnThisLine()) is O(1). + // + Vector<uint32_t, 128> lineStartOffsets_; + uint32_t initialLineNum_; + + // This is mutable because it's modified on every search, but that fact + // isn't visible outside this class. + mutable uint32_t lastLineIndex_; + + uint32_t lineIndexOf(uint32_t offset) const; + + static const uint32_t MAX_PTR = UINT32_MAX; + + uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; } + uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; } + + public: + SourceCoords(ExclusiveContext* cx, uint32_t ln); + + MOZ_MUST_USE bool add(uint32_t lineNum, uint32_t lineStartOffset); + MOZ_MUST_USE bool fill(const SourceCoords& other); + + bool isOnThisLine(uint32_t offset, uint32_t lineNum, bool* onThisLine) const { + uint32_t lineIndex = lineNumToIndex(lineNum); + if (lineIndex + 1 >= lineStartOffsets_.length()) // +1 due to sentinel + return false; + *onThisLine = lineStartOffsets_[lineIndex] <= offset && + offset < lineStartOffsets_[lineIndex + 1]; + return true; + } + + uint32_t lineNum(uint32_t offset) const; + uint32_t columnIndex(uint32_t offset) const; + void lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, uint32_t* columnIndex) const; + }; + + SourceCoords srcCoords; + + JSAtomState& names() const { + return cx->names(); + } + + ExclusiveContext* context() const { + return cx; + } + + const ReadOnlyCompileOptions& options() const { + return options_; + } + + private: + // This is the low-level interface to the JS source code buffer. It just + // gets raw chars, basically. TokenStreams functions are layered on top + // and do some extra stuff like converting all EOL sequences to '\n', + // tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw + // chars" refers to the lack of EOL sequence normalization.) + // + // buf[0..length-1] often represents a substring of some larger source, + // where we have only the substring in memory. The |startOffset| argument + // indicates the offset within this larger string at which our string + // begins, the offset of |buf[0]|. + class TokenBuf { + public: + TokenBuf(ExclusiveContext* cx, const char16_t* buf, size_t length, size_t startOffset) + : base_(buf), + startOffset_(startOffset), + limit_(buf + length), + ptr(buf) + { } + + bool hasRawChars() const { + return ptr < limit_; + } + + bool atStart() const { + return offset() == 0; + } + + size_t startOffset() const { + return startOffset_; + } + + size_t offset() const { + return startOffset_ + mozilla::PointerRangeSize(base_, ptr); + } + + const char16_t* rawCharPtrAt(size_t offset) const { + MOZ_ASSERT(startOffset_ <= offset); + MOZ_ASSERT(offset - startOffset_ <= mozilla::PointerRangeSize(base_, limit_)); + return base_ + (offset - startOffset_); + } + + const char16_t* limit() const { + return limit_; + } + + char16_t getRawChar() { + return *ptr++; // this will nullptr-crash if poisoned + } + + char16_t peekRawChar() const { + return *ptr; // this will nullptr-crash if poisoned + } + + bool matchRawChar(char16_t c) { + if (*ptr == c) { // this will nullptr-crash if poisoned + ptr++; + return true; + } + return false; + } + + bool matchRawCharBackwards(char16_t c) { + MOZ_ASSERT(ptr); // make sure it hasn't been poisoned + if (*(ptr - 1) == c) { + ptr--; + return true; + } + return false; + } + + void ungetRawChar() { + MOZ_ASSERT(ptr); // make sure it hasn't been poisoned + ptr--; + } + + const char16_t* addressOfNextRawChar(bool allowPoisoned = false) const { + MOZ_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned + return ptr; + } + + // Use this with caution! + void setAddressOfNextRawChar(const char16_t* a, bool allowPoisoned = false) { + MOZ_ASSERT_IF(!allowPoisoned, a); + ptr = a; + } + +#ifdef DEBUG + // Poison the TokenBuf so it cannot be accessed again. + void poison() { + ptr = nullptr; + } +#endif + + static bool isRawEOLChar(int32_t c) { + return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR; + } + + // Returns the offset of the next EOL, but stops once 'max' characters + // have been scanned (*including* the char at startOffset_). + size_t findEOLMax(size_t start, size_t max); + + private: + const char16_t* base_; // base of buffer + uint32_t startOffset_; // offset of base_[0] + const char16_t* limit_; // limit for quick bounds check + const char16_t* ptr; // next char to get + }; + + MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier); + + MOZ_MUST_USE bool getBracedUnicode(uint32_t* code); + MOZ_MUST_USE bool getStringOrTemplateToken(int untilChar, Token** tp); + + int32_t getChar(); + int32_t getCharIgnoreEOL(); + void ungetChar(int32_t c); + void ungetCharIgnoreEOL(int32_t c); + Token* newToken(ptrdiff_t adjust); + uint32_t peekUnicodeEscape(uint32_t* codePoint); + uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint); + uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint); + bool matchUnicodeEscapeIdent(uint32_t* codePoint); + bool peekChars(int n, char16_t* cp); + + MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); + MOZ_MUST_USE bool getDirective(bool isMultiline, bool shouldWarnDeprecated, + const char* directive, int directiveLength, + const char* errorMsgPragma, + UniquePtr<char16_t[], JS::FreePolicy>* destination); + MOZ_MUST_USE bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated); + MOZ_MUST_USE bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated); + + // |expect| cannot be an EOL char. + bool matchChar(int32_t expect) { + MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect)); + return MOZ_LIKELY(userbuf.hasRawChars()) && + userbuf.matchRawChar(expect); + } + + void consumeKnownChar(int32_t expect) { + mozilla::DebugOnly<int32_t> c = getChar(); + MOZ_ASSERT(c == expect); + } + + int32_t peekChar() { + int32_t c = getChar(); + ungetChar(c); + return c; + } + + void skipChars(int n) { + while (--n >= 0) + getChar(); + } + + void skipCharsIgnoreEOL(int n) { + while (--n >= 0) + getCharIgnoreEOL(); + } + + void updateLineInfoForEOL(); + void updateFlagsForEOL(); + + const Token& nextToken() const { + MOZ_ASSERT(hasLookahead()); + return tokens[(cursor + 1) & ntokensMask]; + } + + bool hasLookahead() const { return lookahead > 0; } + + // Options used for parsing/tokenizing. + const ReadOnlyCompileOptions& options_; + + Token tokens[ntokens]; // circular token buffer + unsigned cursor; // index of last parsed token + unsigned lookahead; // count of lookahead tokens + unsigned lineno; // current line number + Flags flags; // flags -- see above + size_t linebase; // start of current line + size_t prevLinebase; // start of previous line; size_t(-1) if on the first line + TokenBuf userbuf; // user input buffer + const char* filename; // input filename or null + UniqueTwoByteChars displayURL_; // the user's requested source URL or null + UniqueTwoByteChars sourceMapURL_; // source map's filename or null + CharBuffer tokenbuf; // current token string buffer + uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs? + ExclusiveContext* const cx; + bool mutedErrors; + StrictModeGetter* strictModeGetter; // used to test for strict mode +}; + +class MOZ_STACK_CLASS AutoAwaitIsKeyword +{ +private: + TokenStream* ts_; + bool oldAwaitIsKeyword_; + +public: + AutoAwaitIsKeyword(TokenStream* ts, bool awaitIsKeyword) { + ts_ = ts; + oldAwaitIsKeyword_ = ts_->awaitIsKeyword; + ts_->awaitIsKeyword = awaitIsKeyword; + } + + ~AutoAwaitIsKeyword() { + ts_->awaitIsKeyword = oldAwaitIsKeyword_; + ts_ = nullptr; + } +}; + +extern const char* +TokenKindToDesc(TokenKind tt); + +} // namespace frontend +} // namespace js + +extern JS_FRIEND_API(int) +js_fgets(char* buf, int size, FILE* file); + +#ifdef DEBUG +extern const char* +TokenKindToString(js::frontend::TokenKind tt); +#endif + +#endif /* frontend_TokenStream_h */ |