summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoonchild <moonchild@palemoon.org>2021-02-05 12:44:26 +0000
committerMoonchild <moonchild@palemoon.org>2021-02-05 12:44:26 +0000
commit66d028942c3246f17f3f86ce40c230041556d27d (patch)
treefd948b4a70ddb224b0e95d6a11f0398e8601715e
parentbd9fdeccc7f731450188dce4d7ec21643306988c (diff)
downloaduxp-66d028942c3246f17f3f86ce40c230041556d27d.tar.gz
Issue #1732 - Implement JSON superset proposal.
This change makes unicode line and paragraph separators allowed in JS strings, removing the only quirk of the JS string definition that made JSON not a valid subset of JS. Resolves #1732
-rw-r--r--js/src/builtin/Eval.cpp48
-rw-r--r--js/src/frontend/TokenStream.cpp15
-rw-r--r--js/src/jit-test/tests/latin1/json.js11
-rw-r--r--js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js59
4 files changed, 44 insertions, 89 deletions
diff --git a/js/src/builtin/Eval.cpp b/js/src/builtin/Eval.cpp
index 4ee7a35c85..53fa789313 100644
--- a/js/src/builtin/Eval.cpp
+++ b/js/src/builtin/Eval.cpp
@@ -141,35 +141,29 @@ template <typename CharT>
static bool
EvalStringMightBeJSON(const mozilla::Range<const CharT> chars)
{
- // If the eval string starts with '(' or '[' and ends with ')' or ']', it may be JSON.
- // Try the JSON parser first because it's much faster. If the eval string
- // isn't JSON, JSON parsing will probably fail quickly, so little time
- // will be lost.
+ // If the eval string starts with '(' or '[' and ends with ')' or ']', it
+ // may be JSON. Try the JSON parser first because it's much faster. If
+ // the eval string isn't JSON, JSON parsing will probably fail quickly, so
+ // little time will be lost.
size_t length = chars.length();
- if (length > 2 &&
- ((chars[0] == '[' && chars[length - 1] == ']') ||
- (chars[0] == '(' && chars[length - 1] == ')')))
- {
- // Remarkably, JavaScript syntax is not a superset of JSON syntax:
- // strings in JavaScript cannot contain the Unicode line and paragraph
- // terminator characters U+2028 and U+2029, but strings in JSON can.
- // Rather than force the JSON parser to handle this quirk when used by
- // eval, we simply don't use the JSON parser when either character
- // appears in the provided string. See bug 657367.
- if (sizeof(CharT) > 1) {
- for (RangedPtr<const CharT> cp = chars.begin() + 1, end = chars.end() - 1;
- cp < end;
- cp++)
- {
- char16_t c = *cp;
- if (c == 0x2028 || c == 0x2029)
- return false;
- }
- }
+ if (length < 2)
+ return false;
- return true;
- }
- return false;
+ // It used to be that strings in JavaScript forbid U+2028 LINE SEPARATOR
+ // and U+2029 PARAGRAPH SEPARATOR, so something like
+ //
+ // eval("['" + "\u2028" + "']");
+ //
+ // i.e. an array containing a string with a line separator in it, *would*
+ // be JSON but *would not* be valid JavaScript. Handing such a string to
+ // the JSON parser would then fail to recognize a syntax error. As of
+ // <https://tc39.github.io/proposal-json-superset/> JavaScript strings may
+ // contain these two code points, so it's safe to JSON-parse eval strings
+ // that contain them.
+
+ CharT first = chars[0], last = chars[length - 1];
+ return (first == '[' && last == ']') ||
+ (first == '(' && last == ')');
}
template <typename CharT>
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
index d65511a8cc..7a253cc0e9 100644
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -603,6 +603,10 @@ TokenStream::TokenBuf::findEOLMax(size_t start, size_t max)
if (n >= max)
break;
n++;
+
+ // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
+ // string and template literals. These code points do affect line and
+ // column coordinates, even as they encode their literal values.
if (TokenBuf::isRawEOLChar(*p++))
break;
}
@@ -2129,8 +2133,9 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp)
}
break;
}
- } else if (TokenBuf::isRawEOLChar(c)) {
+ } else if (c == '\r' || c == '\n') {
if (!parsingTemplate) {
+ // String literals don't allow ASCII line breaks.
ungetCharIgnoreEOL(c);
error(JSMSG_UNTERMINATED_STRING);
return false;
@@ -2138,10 +2143,18 @@ TokenStream::getStringOrTemplateToken(int untilChar, Token** tp)
if (c == '\r') {
c = '\n';
if (userbuf.peekRawChar() == '\n')
+ // Treat CRLF as a single line break.
skipCharsIgnoreEOL(1);
}
updateLineInfoForEOL();
updateFlagsForEOL();
+ } else if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
+ // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR encode
+ // their literal values in template literals and (as of the
+ // JSON superset proposal) string literals, but they still count
+ // as line terminators when computing line/column coordinates.
+ updateLineInfoForEOL();
+ updateFlagsForEOL();
} else if (parsingTemplate && c == '$') {
if ((nc = getCharIgnoreEOL()) == '{')
break;
diff --git a/js/src/jit-test/tests/latin1/json.js b/js/src/jit-test/tests/latin1/json.js
index 16559890d7..ab7610cd5a 100644
--- a/js/src/jit-test/tests/latin1/json.js
+++ b/js/src/jit-test/tests/latin1/json.js
@@ -55,13 +55,10 @@ function testEvalHackNotJSON() {
arr = eval("[]; var z; [1, 2, 3, \"abc\u1200\"]");
assertEq(JSON.stringify(arr), '[1,2,3,"abc\u1200"]');
- try {
- eval("[1, 2, 3, \"abc\u2028\"]");
- throw new Error("U+2028 shouldn't eval");
- } catch (e) {
- assertEq(e instanceof SyntaxError, true,
- "should have thrown a SyntaxError, instead got " + e);
- }
+ // JSON superset
+ var arr = eval("[1, 2, 3, \"abc\u2028\"]");
+ assertEq(arr.length, 4);
+ assertEq(arr[3], "abc\u2028");
}
testEvalHackNotJSON();
diff --git a/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js b/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js
index 61047fb109..9a904073ab 100644
--- a/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js
+++ b/js/src/tests/ecma_5/eval/line-terminator-paragraph-terminator.js
@@ -3,7 +3,9 @@
//-----------------------------------------------------------------------------
var BUGNUMBER = 657367;
-var summary = "eval must not parse strings containing U+2028 or U+2029";
+var summary =
+ "eval via the JSON parser should parse strings containing U+2028/U+2029 " +
+ "(as of <https://tc39.github.io/proposal-json-superset/>, that is)";
print(BUGNUMBER + ": " + summary);
@@ -11,59 +13,8 @@ print(BUGNUMBER + ": " + summary);
* BEGIN TEST *
**************/
-function esc(s)
-{
- return s.split("").map(function(v)
- {
- var code =
- ("000" + v.charCodeAt(0).toString(16)).slice(-4);
- return "\\u" + code;
- }).join("");
-}
-
-try
-{
- var r = eval('"\u2028"');
- throw new Error("\"\\u2028\" didn't throw, returned " + esc(r));
-}
-catch (e)
-{
- assertEq(e instanceof SyntaxError, true,
- "U+2028 is not a valid string character");
-}
-
-try
-{
- var r = eval('("\u2028")');
- throw new Error("(\"\\u2028\") didn't throw, returned " + esc(r));
-}
-catch (e)
-{
- assertEq(e instanceof SyntaxError, true,
- "U+2028 is not a valid string character");
-}
-
-try
-{
- var r = eval('"\u2029"');
- throw new Error("\"\\u2029\" didn't throw, returned " + esc(r));
-}
-catch (e)
-{
- assertEq(e instanceof SyntaxError, true,
- "U+2029 is not a valid string character");
-}
-
-try
-{
- var r = eval('("\u2029")');
- throw new Error("(\"\\u2029\") didn't throw, returned " + esc(r));
-}
-catch (e)
-{
- assertEq(e instanceof SyntaxError, true,
- "U+2029 is not a valid string character");
-}
+assertEq(eval('("\u2028")'), "\u2028");
+assertEq(eval('("\u2029")'), "\u2029");
/******************************************************************************/