diff options
author | Moonchild <moonchild@palemoon.org> | 2021-02-05 12:44:26 +0000 |
---|---|---|
committer | Moonchild <moonchild@palemoon.org> | 2021-02-05 12:44:26 +0000 |
commit | 66d028942c3246f17f3f86ce40c230041556d27d (patch) | |
tree | fd948b4a70ddb224b0e95d6a11f0398e8601715e | |
parent | bd9fdeccc7f731450188dce4d7ec21643306988c (diff) | |
download | uxp-66d028942c3246f17f3f86ce40c230041556d27d.tar.gz |
Issue #1732 - Implement JSON superset proposal.
This change makes unicode line and paragraph separators allowed in JS strings,
removing the only quirk of the JS string definition that made JSON not a valid
subset of JS.
Resolves #1732
-rw-r--r-- | js/src/builtin/Eval.cpp | 48 | ||||
-rw-r--r-- | js/src/frontend/TokenStream.cpp | 15 | ||||
-rw-r--r-- | js/src/jit-test/tests/latin1/json.js | 11 | ||||
-rw-r--r-- | js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js | 59 |
4 files changed, 44 insertions, 89 deletions
diff --git a/js/src/builtin/Eval.cpp b/js/src/builtin/Eval.cpp index 4ee7a35c85..53fa789313 100644 --- a/js/src/builtin/Eval.cpp +++ b/js/src/builtin/Eval.cpp @@ -141,35 +141,29 @@ template <typename CharT> static bool EvalStringMightBeJSON(const mozilla::Range<const CharT> chars) { - // If the eval string starts with '(' or '[' and ends with ')' or ']', it may be JSON. - // Try the JSON parser first because it's much faster. If the eval string - // isn't JSON, JSON parsing will probably fail quickly, so little time - // will be lost. + // If the eval string starts with '(' or '[' and ends with ')' or ']', it + // may be JSON. Try the JSON parser first because it's much faster. If + // the eval string isn't JSON, JSON parsing will probably fail quickly, so + // little time will be lost. size_t length = chars.length(); - if (length > 2 && - ((chars[0] == '[' && chars[length - 1] == ']') || - (chars[0] == '(' && chars[length - 1] == ')'))) - { - // Remarkably, JavaScript syntax is not a superset of JSON syntax: - // strings in JavaScript cannot contain the Unicode line and paragraph - // terminator characters U+2028 and U+2029, but strings in JSON can. - // Rather than force the JSON parser to handle this quirk when used by - // eval, we simply don't use the JSON parser when either character - // appears in the provided string. See bug 657367. - if (sizeof(CharT) > 1) { - for (RangedPtr<const CharT> cp = chars.begin() + 1, end = chars.end() - 1; - cp < end; - cp++) - { - char16_t c = *cp; - if (c == 0x2028 || c == 0x2029) - return false; - } - } + if (length < 2) + return false; - return true; - } - return false; + // It used to be that strings in JavaScript forbid U+2028 LINE SEPARATOR + // and U+2029 PARAGRAPH SEPARATOR, so something like + // + // eval("['" + "\u2028" + "']"); + // + // i.e. an array containing a string with a line separator in it, *would* + // be JSON but *would not* be valid JavaScript. Handing such a string to + // the JSON parser would then fail to recognize a syntax error. As of + // <https://tc39.github.io/proposal-json-superset/> JavaScript strings may + // contain these two code points, so it's safe to JSON-parse eval strings + // that contain them. + + CharT first = chars[0], last = chars[length - 1]; + return (first == '[' && last == ']') || + (first == '(' && last == ')'); } template <typename CharT> diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index d65511a8cc..7a253cc0e9 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -603,6 +603,10 @@ TokenStream::TokenBuf::findEOLMax(size_t start, size_t max) if (n >= max) break; n++; + + // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in + // string and template literals. These code points do affect line and + // column coordinates, even as they encode their literal values. if (TokenBuf::isRawEOLChar(*p++)) break; } @@ -2129,8 +2133,9 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) } break; } - } else if (TokenBuf::isRawEOLChar(c)) { + } else if (c == '\r' || c == '\n') { if (!parsingTemplate) { + // String literals don't allow ASCII line breaks. ungetCharIgnoreEOL(c); error(JSMSG_UNTERMINATED_STRING); return false; @@ -2138,10 +2143,18 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) if (c == '\r') { c = '\n'; if (userbuf.peekRawChar() == '\n') + // Treat CRLF as a single line break. skipCharsIgnoreEOL(1); } updateLineInfoForEOL(); updateFlagsForEOL(); + } else if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) { + // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR encode + // their literal values in template literals and (as of the + // JSON superset proposal) string literals, but they still count + // as line terminators when computing line/column coordinates. + updateLineInfoForEOL(); + updateFlagsForEOL(); } else if (parsingTemplate && c == '$') { if ((nc = getCharIgnoreEOL()) == '{') break; diff --git a/js/src/jit-test/tests/latin1/json.js b/js/src/jit-test/tests/latin1/json.js index 16559890d7..ab7610cd5a 100644 --- a/js/src/jit-test/tests/latin1/json.js +++ b/js/src/jit-test/tests/latin1/json.js @@ -55,13 +55,10 @@ function testEvalHackNotJSON() { arr = eval("[]; var z; [1, 2, 3, \"abc\u1200\"]"); assertEq(JSON.stringify(arr), '[1,2,3,"abc\u1200"]'); - try { - eval("[1, 2, 3, \"abc\u2028\"]"); - throw new Error("U+2028 shouldn't eval"); - } catch (e) { - assertEq(e instanceof SyntaxError, true, - "should have thrown a SyntaxError, instead got " + e); - } + // JSON superset + var arr = eval("[1, 2, 3, \"abc\u2028\"]"); + assertEq(arr.length, 4); + assertEq(arr[3], "abc\u2028"); } testEvalHackNotJSON(); diff --git a/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js b/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js index 61047fb109..9a904073ab 100644 --- a/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js +++ b/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js @@ -3,7 +3,9 @@ //----------------------------------------------------------------------------- var BUGNUMBER = 657367; -var summary = "eval must not parse strings containing U+2028 or U+2029"; +var summary = + "eval via the JSON parser should parse strings containing U+2028/U+2029 " + + "(as of <https://tc39.github.io/proposal-json-superset/>, that is)"; print(BUGNUMBER + ": " + summary); @@ -11,59 +13,8 @@ print(BUGNUMBER + ": " + summary); * BEGIN TEST * **************/ -function esc(s) -{ - return s.split("").map(function(v) - { - var code = - ("000" + v.charCodeAt(0).toString(16)).slice(-4); - return "\\u" + code; - }).join(""); -} - -try -{ - var r = eval('"\u2028"'); - throw new Error("\"\\u2028\" didn't throw, returned " + esc(r)); -} -catch (e) -{ - assertEq(e instanceof SyntaxError, true, - "U+2028 is not a valid string character"); -} - -try -{ - var r = eval('("\u2028")'); - throw new Error("(\"\\u2028\") didn't throw, returned " + esc(r)); -} -catch (e) -{ - assertEq(e instanceof SyntaxError, true, - "U+2028 is not a valid string character"); -} - -try -{ - var r = eval('"\u2029"'); - throw new Error("\"\\u2029\" didn't throw, returned " + esc(r)); -} -catch (e) -{ - assertEq(e instanceof SyntaxError, true, - "U+2029 is not a valid string character"); -} - -try -{ - var r = eval('("\u2029")'); - throw new Error("(\"\\u2029\") didn't throw, returned " + esc(r)); -} -catch (e) -{ - assertEq(e instanceof SyntaxError, true, - "U+2029 is not a valid string character"); -} +assertEq(eval('("\u2028")'), "\u2028"); +assertEq(eval('("\u2029")'), "\u2029"); /******************************************************************************/ |