summaryrefslogtreecommitdiff
path: root/js/src
diff options
context:
space:
mode:
authorwolfbeast <mcwerewolf@gmail.com>2018-03-18 17:57:11 +0100
committerwolfbeast <mcwerewolf@gmail.com>2018-03-18 17:58:37 +0100
commitb2331d76221b67bffb5cb5a2344c8b150a305dc7 (patch)
tree5d9cae4cdfc502b568f436fd66d77e0b16476395 /js/src
parent122938a4398ae8db07060dca3561ff46f93b5925 (diff)
parent427e7346629c76a1676a3f92320c3d2575d01b85 (diff)
downloaduxp-b2331d76221b67bffb5cb5a2344c8b150a305dc7.tar.gz
Correctly tokenize valid JS names when using code points outside of BMP range.
This resolves #72. Merged remote-tracking branch 'janek/js_variable_unicode_1'
Diffstat (limited to 'js/src')
-rw-r--r--js/src/frontend/TokenStream.cpp149
-rw-r--r--js/src/frontend/TokenStream.h1
-rw-r--r--js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js18
-rw-r--r--js/src/vm/Unicode.cpp879
-rw-r--r--js/src/vm/Unicode.h28
-rw-r--r--js/src/vm/UnicodeNonBMP.h10
-rwxr-xr-xjs/src/vm/make_unicode.py111
7 files changed, 1159 insertions, 37 deletions
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
index c166ed4145..179a7c2447 100644
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -118,13 +118,57 @@ IsIdentifier(const CharT* chars, size_t length)
return true;
}
+static uint32_t
+GetSingleCodePoint(const char16_t** p, const char16_t* end)
+{
+ uint32_t codePoint;
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) {
+ char16_t lead = **p;
+ char16_t maybeTrail = *(*p + 1);
+ if (unicode::IsTrailSurrogate(maybeTrail)) {
+ *p += 2;
+ return unicode::UTF16Decode(lead, maybeTrail);
+ }
+ }
+
+ codePoint = **p;
+ (*p)++;
+ return codePoint;
+}
+
+static bool
+IsIdentifierMaybeNonBMP(const char16_t* chars, size_t length)
+{
+ if (IsIdentifier(chars, length))
+ return true;
+
+ if (length == 0)
+ return false;
+
+ const char16_t* p = chars;
+ const char16_t* end = chars + length;
+ uint32_t codePoint;
+
+ codePoint = GetSingleCodePoint(&p, end);
+ if (!unicode::IsIdentifierStart(codePoint))
+ return false;
+
+ while (p < end) {
+ codePoint = GetSingleCodePoint(&p, end);
+ if (!unicode::IsIdentifierPart(codePoint))
+ return false;
+ }
+
+ return true;
+}
+
bool
frontend::IsIdentifier(JSLinearString* str)
{
JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars()
? ::IsIdentifier(str->latin1Chars(nogc), str->length())
- : ::IsIdentifier(str->twoByteChars(nogc), str->length());
+ : ::IsIdentifierMaybeNonBMP(str->twoByteChars(nogc), str->length());
}
bool
@@ -993,6 +1037,21 @@ IsTokenSane(Token* tp)
#endif
bool
+TokenStream::matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint)
+{
+ int32_t maybeTrail = getCharIgnoreEOL();
+ if (!unicode::IsTrailSurrogate(maybeTrail)) {
+ ungetCharIgnoreEOL(maybeTrail);
+ return false;
+ }
+
+ if (trail)
+ *trail = maybeTrail;
+ *codePoint = unicode::UTF16Decode(lead, maybeTrail);
+ return true;
+}
+
+bool
TokenStream::putIdentInTokenbuf(const char16_t* identStart)
{
int32_t c;
@@ -1003,11 +1062,39 @@ TokenStream::putIdentInTokenbuf(const char16_t* identStart)
tokenbuf.clear();
for (;;) {
c = getCharIgnoreEOL();
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ char16_t trail;
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, &trail, &codePoint)) {
+ if (!unicode::IsIdentifierPart(codePoint))
+ break;
+
+ if (!tokenbuf.append(c) || !tokenbuf.append(trail)) {
+ userbuf.setAddressOfNextRawChar(tmp);
+ return false;
+ }
+ continue;
+ }
+ }
+
if (!unicode::IsIdentifierPart(char16_t(c))) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
+
+ if (MOZ_UNLIKELY(unicode::IsSupplementary(qc))) {
+ char16_t lead, trail;
+ unicode::UTF16Encode(qc, &lead, &trail);
+ if (!tokenbuf.append(lead) || !tokenbuf.append(trail)) {
+ userbuf.setAddressOfNextRawChar(tmp);
+ return false;
+ }
+ continue;
+ }
+
c = qc;
}
+
if (!tokenbuf.append(c)) {
userbuf.setAddressOfNextRawChar(tmp);
return false;
@@ -1168,12 +1255,23 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
static_assert('_' < 128,
"IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
"ensure that '_' is never handled here");
- if (unicode::IsUnicodeIDStart(c)) {
+ if (unicode::IsUnicodeIDStart(char16_t(c))) {
identStart = userbuf.addressOfNextRawChar() - 1;
hadUnicodeEscape = false;
goto identifier;
}
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+ unicode::IsUnicodeIDStart(codePoint))
+ {
+ identStart = userbuf.addressOfNextRawChar() - 2;
+ hadUnicodeEscape = false;
+ goto identifier;
+ }
+ }
+
goto badchar;
}
@@ -1224,6 +1322,17 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
c = getCharIgnoreEOL();
if (c == EOF)
break;
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint)) {
+ if (!unicode::IsIdentifierPart(codePoint))
+ break;
+
+ continue;
+ }
+ }
+
if (!unicode::IsIdentifierPart(char16_t(c))) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
@@ -1318,9 +1427,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
}
ungetCharIgnoreEOL(c);
- if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
- reportError(JSMSG_IDSTART_AFTER_NUMBER);
- goto error;
+ if (c != EOF) {
+ if (unicode::IsIdentifierStart(char16_t(c))) {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+ unicode::IsIdentifierStart(codePoint))
+ {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+ }
}
// Unlike identifiers and strings, numbers cannot contain escaped
@@ -1425,9 +1546,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
}
ungetCharIgnoreEOL(c);
- if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
- reportError(JSMSG_IDSTART_AFTER_NUMBER);
- goto error;
+ if (c != EOF) {
+ if (unicode::IsIdentifierStart(char16_t(c))) {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+
+ if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+ uint32_t codePoint;
+ if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) &&
+ unicode::IsIdentifierStart(codePoint))
+ {
+ reportError(JSMSG_IDSTART_AFTER_NUMBER);
+ goto error;
+ }
+ }
}
double dval;
diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h
index 29dcead62e..5d6b4b795a 100644
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -952,6 +952,7 @@ class MOZ_STACK_CLASS TokenStream
uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint);
uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint);
bool matchUnicodeEscapeIdent(uint32_t* codePoint);
+ bool matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint);
bool peekChars(int n, char16_t* cp);
MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
diff --git a/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js b/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js
index 8e0a05fb50..e4b5f46029 100644
--- a/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js
+++ b/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js
@@ -98,7 +98,7 @@ const otherIdContinue = [
0x19DA, // NEW TAI LUE THAM DIGIT ONE, Gc=No
];
-for (let ident of [...idStart, ...otherIdStart]) {
+for (let ident of [...idStart, ...otherIdStart, ...idStartSupplemental]) {
for (let count of leadingZeros) {
let zeros = "0".repeat(count);
eval(`
@@ -108,20 +108,13 @@ for (let ident of [...idStart, ...otherIdStart]) {
}
}
-// Move this to the loop above when Bug 1197230 is fixed.
-for (let ident of [...idStartSupplemental]) {
- for (let zeros of leadingZeros) {
- assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError);
- }
-}
-
for (let ident of [...idContinue, ...idContinueSupplemental, ...otherIdContinue]) {
for (let zeros of leadingZeros) {
assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError);
}
}
-for (let ident of [...idStart, ...otherIdStart, ...idContinue, ...otherIdContinue]) {
+for (let ident of [...idStart, ...otherIdStart, ...idContinue, ...otherIdContinue, ...idStartSupplemental, ...idContinueSupplemental]) {
for (let zeros of leadingZeros) {
eval(`
let A\\u{${zeros}${ident.toString(16)}} = 123;
@@ -130,13 +123,6 @@ for (let ident of [...idStart, ...otherIdStart, ...idContinue, ...otherIdContinu
}
}
-// Move this to the loop above when Bug 1197230 is fixed.
-for (let ident of [...idStartSupplemental, ...idContinueSupplemental]) {
- for (let zeros of leadingZeros) {
- assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError);
- }
-}
-
const notIdentifiers = [
0x0000, // NULL, Gc=Cc
diff --git a/js/src/vm/Unicode.cpp b/js/src/vm/Unicode.cpp
index f4acf8f31f..82541c2312 100644
--- a/js/src/vm/Unicode.cpp
+++ b/js/src/vm/Unicode.cpp
@@ -1748,3 +1748,882 @@ const uint8_t unicode::folding_index2[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
+bool
+js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint)
+{
+ if (codePoint >= 0x10000 && codePoint <= 0x1000b)
+ return true;
+ if (codePoint >= 0x1000d && codePoint <= 0x10026)
+ return true;
+ if (codePoint >= 0x10028 && codePoint <= 0x1003a)
+ return true;
+ if (codePoint >= 0x1003c && codePoint <= 0x1003d)
+ return true;
+ if (codePoint >= 0x1003f && codePoint <= 0x1004d)
+ return true;
+ if (codePoint >= 0x10050 && codePoint <= 0x1005d)
+ return true;
+ if (codePoint >= 0x10080 && codePoint <= 0x100fa)
+ return true;
+ if (codePoint >= 0x10140 && codePoint <= 0x10174)
+ return true;
+ if (codePoint >= 0x10280 && codePoint <= 0x1029c)
+ return true;
+ if (codePoint >= 0x102a0 && codePoint <= 0x102d0)
+ return true;
+ if (codePoint >= 0x10300 && codePoint <= 0x1031f)
+ return true;
+ if (codePoint >= 0x10330 && codePoint <= 0x1034a)
+ return true;
+ if (codePoint >= 0x10350 && codePoint <= 0x10375)
+ return true;
+ if (codePoint >= 0x10380 && codePoint <= 0x1039d)
+ return true;
+ if (codePoint >= 0x103a0 && codePoint <= 0x103c3)
+ return true;
+ if (codePoint >= 0x103c8 && codePoint <= 0x103cf)
+ return true;
+ if (codePoint >= 0x103d1 && codePoint <= 0x103d5)
+ return true;
+ if (codePoint >= 0x10400 && codePoint <= 0x1049d)
+ return true;
+ if (codePoint >= 0x104b0 && codePoint <= 0x104d3)
+ return true;
+ if (codePoint >= 0x104d8 && codePoint <= 0x104fb)
+ return true;
+ if (codePoint >= 0x10500 && codePoint <= 0x10527)
+ return true;
+ if (codePoint >= 0x10530 && codePoint <= 0x10563)
+ return true;
+ if (codePoint >= 0x10600 && codePoint <= 0x10736)
+ return true;
+ if (codePoint >= 0x10740 && codePoint <= 0x10755)
+ return true;
+ if (codePoint >= 0x10760 && codePoint <= 0x10767)
+ return true;
+ if (codePoint >= 0x10800 && codePoint <= 0x10805)
+ return true;
+ if (codePoint >= 0x10808 && codePoint <= 0x10808)
+ return true;
+ if (codePoint >= 0x1080a && codePoint <= 0x10835)
+ return true;
+ if (codePoint >= 0x10837 && codePoint <= 0x10838)
+ return true;
+ if (codePoint >= 0x1083c && codePoint <= 0x1083c)
+ return true;
+ if (codePoint >= 0x1083f && codePoint <= 0x10855)
+ return true;
+ if (codePoint >= 0x10860 && codePoint <= 0x10876)
+ return true;
+ if (codePoint >= 0x10880 && codePoint <= 0x1089e)
+ return true;
+ if (codePoint >= 0x108e0 && codePoint <= 0x108f2)
+ return true;
+ if (codePoint >= 0x108f4 && codePoint <= 0x108f5)
+ return true;
+ if (codePoint >= 0x10900 && codePoint <= 0x10915)
+ return true;
+ if (codePoint >= 0x10920 && codePoint <= 0x10939)
+ return true;
+ if (codePoint >= 0x10980 && codePoint <= 0x109b7)
+ return true;
+ if (codePoint >= 0x109be && codePoint <= 0x109bf)
+ return true;
+ if (codePoint >= 0x10a00 && codePoint <= 0x10a00)
+ return true;
+ if (codePoint >= 0x10a10 && codePoint <= 0x10a13)
+ return true;
+ if (codePoint >= 0x10a15 && codePoint <= 0x10a17)
+ return true;
+ if (codePoint >= 0x10a19 && codePoint <= 0x10a33)
+ return true;
+ if (codePoint >= 0x10a60 && codePoint <= 0x10a7c)
+ return true;
+ if (codePoint >= 0x10a80 && codePoint <= 0x10a9c)
+ return true;
+ if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7)
+ return true;
+ if (codePoint >= 0x10ac9 && codePoint <= 0x10ae4)
+ return true;
+ if (codePoint >= 0x10b00 && codePoint <= 0x10b35)
+ return true;
+ if (codePoint >= 0x10b40 && codePoint <= 0x10b55)
+ return true;
+ if (codePoint >= 0x10b60 && codePoint <= 0x10b72)
+ return true;
+ if (codePoint >= 0x10b80 && codePoint <= 0x10b91)
+ return true;
+ if (codePoint >= 0x10c00 && codePoint <= 0x10c48)
+ return true;
+ if (codePoint >= 0x10c80 && codePoint <= 0x10cb2)
+ return true;
+ if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2)
+ return true;
+ if (codePoint >= 0x11003 && codePoint <= 0x11037)
+ return true;
+ if (codePoint >= 0x11083 && codePoint <= 0x110af)
+ return true;
+ if (codePoint >= 0x110d0 && codePoint <= 0x110e8)
+ return true;
+ if (codePoint >= 0x11103 && codePoint <= 0x11126)
+ return true;
+ if (codePoint >= 0x11150 && codePoint <= 0x11172)
+ return true;
+ if (codePoint >= 0x11176 && codePoint <= 0x11176)
+ return true;
+ if (codePoint >= 0x11183 && codePoint <= 0x111b2)
+ return true;
+ if (codePoint >= 0x111c1 && codePoint <= 0x111c4)
+ return true;
+ if (codePoint >= 0x111da && codePoint <= 0x111da)
+ return true;
+ if (codePoint >= 0x111dc && codePoint <= 0x111dc)
+ return true;
+ if (codePoint >= 0x11200 && codePoint <= 0x11211)
+ return true;
+ if (codePoint >= 0x11213 && codePoint <= 0x1122b)
+ return true;
+ if (codePoint >= 0x11280 && codePoint <= 0x11286)
+ return true;
+ if (codePoint >= 0x11288 && codePoint <= 0x11288)
+ return true;
+ if (codePoint >= 0x1128a && codePoint <= 0x1128d)
+ return true;
+ if (codePoint >= 0x1128f && codePoint <= 0x1129d)
+ return true;
+ if (codePoint >= 0x1129f && codePoint <= 0x112a8)
+ return true;
+ if (codePoint >= 0x112b0 && codePoint <= 0x112de)
+ return true;
+ if (codePoint >= 0x11305 && codePoint <= 0x1130c)
+ return true;
+ if (codePoint >= 0x1130f && codePoint <= 0x11310)
+ return true;
+ if (codePoint >= 0x11313 && codePoint <= 0x11328)
+ return true;
+ if (codePoint >= 0x1132a && codePoint <= 0x11330)
+ return true;
+ if (codePoint >= 0x11332 && codePoint <= 0x11333)
+ return true;
+ if (codePoint >= 0x11335 && codePoint <= 0x11339)
+ return true;
+ if (codePoint >= 0x1133d && codePoint <= 0x1133d)
+ return true;
+ if (codePoint >= 0x11350 && codePoint <= 0x11350)
+ return true;
+ if (codePoint >= 0x1135d && codePoint <= 0x11361)
+ return true;
+ if (codePoint >= 0x11400 && codePoint <= 0x11434)
+ return true;
+ if (codePoint >= 0x11447 && codePoint <= 0x1144a)
+ return true;
+ if (codePoint >= 0x11480 && codePoint <= 0x114af)
+ return true;
+ if (codePoint >= 0x114c4 && codePoint <= 0x114c5)
+ return true;
+ if (codePoint >= 0x114c7 && codePoint <= 0x114c7)
+ return true;
+ if (codePoint >= 0x11580 && codePoint <= 0x115ae)
+ return true;
+ if (codePoint >= 0x115d8 && codePoint <= 0x115db)
+ return true;
+ if (codePoint >= 0x11600 && codePoint <= 0x1162f)
+ return true;
+ if (codePoint >= 0x11644 && codePoint <= 0x11644)
+ return true;
+ if (codePoint >= 0x11680 && codePoint <= 0x116aa)
+ return true;
+ if (codePoint >= 0x11700 && codePoint <= 0x11719)
+ return true;
+ if (codePoint >= 0x118a0 && codePoint <= 0x118df)
+ return true;
+ if (codePoint >= 0x118ff && codePoint <= 0x118ff)
+ return true;
+ if (codePoint >= 0x11ac0 && codePoint <= 0x11af8)
+ return true;
+ if (codePoint >= 0x11c00 && codePoint <= 0x11c08)
+ return true;
+ if (codePoint >= 0x11c0a && codePoint <= 0x11c2e)
+ return true;
+ if (codePoint >= 0x11c40 && codePoint <= 0x11c40)
+ return true;
+ if (codePoint >= 0x11c72 && codePoint <= 0x11c8f)
+ return true;
+ if (codePoint >= 0x12000 && codePoint <= 0x12399)
+ return true;
+ if (codePoint >= 0x12400 && codePoint <= 0x1246e)
+ return true;
+ if (codePoint >= 0x12480 && codePoint <= 0x12543)
+ return true;
+ if (codePoint >= 0x13000 && codePoint <= 0x1342e)
+ return true;
+ if (codePoint >= 0x14400 && codePoint <= 0x14646)
+ return true;
+ if (codePoint >= 0x16800 && codePoint <= 0x16a38)
+ return true;
+ if (codePoint >= 0x16a40 && codePoint <= 0x16a5e)
+ return true;
+ if (codePoint >= 0x16ad0 && codePoint <= 0x16aed)
+ return true;
+ if (codePoint >= 0x16b00 && codePoint <= 0x16b2f)
+ return true;
+ if (codePoint >= 0x16b40 && codePoint <= 0x16b43)
+ return true;
+ if (codePoint >= 0x16b63 && codePoint <= 0x16b77)
+ return true;
+ if (codePoint >= 0x16b7d && codePoint <= 0x16b8f)
+ return true;
+ if (codePoint >= 0x16f00 && codePoint <= 0x16f44)
+ return true;
+ if (codePoint >= 0x16f50 && codePoint <= 0x16f50)
+ return true;
+ if (codePoint >= 0x16f93 && codePoint <= 0x16f9f)
+ return true;
+ if (codePoint >= 0x16fe0 && codePoint <= 0x16fe0)
+ return true;
+ if (codePoint >= 0x17000 && codePoint <= 0x187ec)
+ return true;
+ if (codePoint >= 0x18800 && codePoint <= 0x18af2)
+ return true;
+ if (codePoint >= 0x1b000 && codePoint <= 0x1b001)
+ return true;
+ if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a)
+ return true;
+ if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c)
+ return true;
+ if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88)
+ return true;
+ if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99)
+ return true;
+ if (codePoint >= 0x1d400 && codePoint <= 0x1d454)
+ return true;
+ if (codePoint >= 0x1d456 && codePoint <= 0x1d49c)
+ return true;
+ if (codePoint >= 0x1d49e && codePoint <= 0x1d49f)
+ return true;
+ if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2)
+ return true;
+ if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6)
+ return true;
+ if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac)
+ return true;
+ if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9)
+ return true;
+ if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb)
+ return true;
+ if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3)
+ return true;
+ if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505)
+ return true;
+ if (codePoint >= 0x1d507 && codePoint <= 0x1d50a)
+ return true;
+ if (codePoint >= 0x1d50d && codePoint <= 0x1d514)
+ return true;
+ if (codePoint >= 0x1d516 && codePoint <= 0x1d51c)
+ return true;
+ if (codePoint >= 0x1d51e && codePoint <= 0x1d539)
+ return true;
+ if (codePoint >= 0x1d53b && codePoint <= 0x1d53e)
+ return true;
+ if (codePoint >= 0x1d540 && codePoint <= 0x1d544)
+ return true;
+ if (codePoint >= 0x1d546 && codePoint <= 0x1d546)
+ return true;
+ if (codePoint >= 0x1d54a && codePoint <= 0x1d550)
+ return true;
+ if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5)
+ return true;
+ if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0)
+ return true;
+ if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da)
+ return true;
+ if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa)
+ return true;
+ if (codePoint >= 0x1d6fc && codePoint <= 0x1d714)
+ return true;
+ if (codePoint >= 0x1d716 && codePoint <= 0x1d734)
+ return true;
+ if (codePoint >= 0x1d736 && codePoint <= 0x1d74e)
+ return true;
+ if (codePoint >= 0x1d750 && codePoint <= 0x1d76e)
+ return true;
+ if (codePoint >= 0x1d770 && codePoint <= 0x1d788)
+ return true;
+ if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8)
+ return true;
+ if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2)
+ return true;
+ if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb)
+ return true;
+ if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4)
+ return true;
+ if (codePoint >= 0x1e900 && codePoint <= 0x1e943)
+ return true;
+ if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03)
+ return true;
+ if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f)
+ return true;
+ if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22)
+ return true;
+ if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24)
+ return true;
+ if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27)
+ return true;
+ if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32)
+ return true;
+ if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37)
+ return true;
+ if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39)
+ return true;
+ if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b)
+ return true;
+ if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42)
+ return true;
+ if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47)
+ return true;
+ if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49)
+ return true;
+ if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b)
+ return true;
+ if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f)
+ return true;
+ if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52)
+ return true;
+ if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54)
+ return true;
+ if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57)
+ return true;
+ if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59)
+ return true;
+ if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b)
+ return true;
+ if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d)
+ return true;
+ if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f)
+ return true;
+ if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62)
+ return true;
+ if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64)
+ return true;
+ if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a)
+ return true;
+ if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72)
+ return true;
+ if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77)
+ return true;
+ if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c)
+ return true;
+ if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e)
+ return true;
+ if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89)
+ return true;
+ if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b)
+ return true;
+ if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3)
+ return true;
+ if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9)
+ return true;
+ if (codePoint >= 0x1eeab && codePoint <= 0x1eebb)
+ return true;
+ if (codePoint >= 0x20000 && codePoint <= 0x2a6d6)
+ return true;
+ if (codePoint >= 0x2a700 && codePoint <= 0x2b734)
+ return true;
+ if (codePoint >= 0x2b740 && codePoint <= 0x2b81d)
+ return true;
+ if (codePoint >= 0x2b820 && codePoint <= 0x2cea1)
+ return true;
+ if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d)
+ return true;
+ return false;
+}
+
+bool
+js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint)
+{
+ if (codePoint >= 0x10000 && codePoint <= 0x1000b)
+ return true;
+ if (codePoint >= 0x1000d && codePoint <= 0x10026)
+ return true;
+ if (codePoint >= 0x10028 && codePoint <= 0x1003a)
+ return true;
+ if (codePoint >= 0x1003c && codePoint <= 0x1003d)
+ return true;
+ if (codePoint >= 0x1003f && codePoint <= 0x1004d)
+ return true;
+ if (codePoint >= 0x10050 && codePoint <= 0x1005d)
+ return true;
+ if (codePoint >= 0x10080 && codePoint <= 0x100fa)
+ return true;
+ if (codePoint >= 0x10140 && codePoint <= 0x10174)
+ return true;
+ if (codePoint >= 0x101fd && codePoint <= 0x101fd)
+ return true;
+ if (codePoint >= 0x10280 && codePoint <= 0x1029c)
+ return true;
+ if (codePoint >= 0x102a0 && codePoint <= 0x102d0)
+ return true;
+ if (codePoint >= 0x102e0 && codePoint <= 0x102e0)
+ return true;
+ if (codePoint >= 0x10300 && codePoint <= 0x1031f)
+ return true;
+ if (codePoint >= 0x10330 && codePoint <= 0x1034a)
+ return true;
+ if (codePoint >= 0x10350 && codePoint <= 0x1037a)
+ return true;
+ if (codePoint >= 0x10380 && codePoint <= 0x1039d)
+ return true;
+ if (codePoint >= 0x103a0 && codePoint <= 0x103c3)
+ return true;
+ if (codePoint >= 0x103c8 && codePoint <= 0x103cf)
+ return true;
+ if (codePoint >= 0x103d1 && codePoint <= 0x103d5)
+ return true;
+ if (codePoint >= 0x10400 && codePoint <= 0x1049d)
+ return true;
+ if (codePoint >= 0x104a0 && codePoint <= 0x104a9)
+ return true;
+ if (codePoint >= 0x104b0 && codePoint <= 0x104d3)
+ return true;
+ if (codePoint >= 0x104d8 && codePoint <= 0x104fb)
+ return true;
+ if (codePoint >= 0x10500 && codePoint <= 0x10527)
+ return true;
+ if (codePoint >= 0x10530 && codePoint <= 0x10563)
+ return true;
+ if (codePoint >= 0x10600 && codePoint <= 0x10736)
+ return true;
+ if (codePoint >= 0x10740 && codePoint <= 0x10755)
+ return true;
+ if (codePoint >= 0x10760 && codePoint <= 0x10767)
+ return true;
+ if (codePoint >= 0x10800 && codePoint <= 0x10805)
+ return true;
+ if (codePoint >= 0x10808 && codePoint <= 0x10808)
+ return true;
+ if (codePoint >= 0x1080a && codePoint <= 0x10835)
+ return true;
+ if (codePoint >= 0x10837 && codePoint <= 0x10838)
+ return true;
+ if (codePoint >= 0x1083c && codePoint <= 0x1083c)
+ return true;
+ if (codePoint >= 0x1083f && codePoint <= 0x10855)
+ return true;
+ if (codePoint >= 0x10860 && codePoint <= 0x10876)
+ return true;
+ if (codePoint >= 0x10880 && codePoint <= 0x1089e)
+ return true;
+ if (codePoint >= 0x108e0 && codePoint <= 0x108f2)
+ return true;
+ if (codePoint >= 0x108f4 && codePoint <= 0x108f5)
+ return true;
+ if (codePoint >= 0x10900 && codePoint <= 0x10915)
+ return true;
+ if (codePoint >= 0x10920 && codePoint <= 0x10939)
+ return true;
+ if (codePoint >= 0x10980 && codePoint <= 0x109b7)
+ return true;
+ if (codePoint >= 0x109be && codePoint <= 0x109bf)
+ return true;
+ if (codePoint >= 0x10a00 && codePoint <= 0x10a03)
+ return true;
+ if (codePoint >= 0x10a05 && codePoint <= 0x10a06)
+ return true;
+ if (codePoint >= 0x10a0c && codePoint <= 0x10a13)
+ return true;
+ if (codePoint >= 0x10a15 && codePoint <= 0x10a17)
+ return true;
+ if (codePoint >= 0x10a19 && codePoint <= 0x10a33)
+ return true;
+ if (codePoint >= 0x10a38 && codePoint <= 0x10a3a)
+ return true;
+ if (codePoint >= 0x10a3f && codePoint <= 0x10a3f)
+ return true;
+ if (codePoint >= 0x10a60 && codePoint <= 0x10a7c)
+ return true;
+ if (codePoint >= 0x10a80 && codePoint <= 0x10a9c)
+ return true;
+ if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7)
+ return true;
+ if (codePoint >= 0x10ac9 && codePoint <= 0x10ae6)
+ return true;
+ if (codePoint >= 0x10b00 && codePoint <= 0x10b35)
+ return true;
+ if (codePoint >= 0x10b40 && codePoint <= 0x10b55)
+ return true;
+ if (codePoint >= 0x10b60 && codePoint <= 0x10b72)
+ return true;
+ if (codePoint >= 0x10b80 && codePoint <= 0x10b91)
+ return true;
+ if (codePoint >= 0x10c00 && codePoint <= 0x10c48)
+ return true;
+ if (codePoint >= 0x10c80 && codePoint <= 0x10cb2)
+ return true;
+ if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2)
+ return true;
+ if (codePoint >= 0x11000 && codePoint <= 0x11046)
+ return true;
+ if (codePoint >= 0x11066 && codePoint <= 0x1106f)
+ return true;
+ if (codePoint >= 0x1107f && codePoint <= 0x110ba)
+ return true;
+ if (codePoint >= 0x110d0 && codePoint <= 0x110e8)
+ return true;
+ if (codePoint >= 0x110f0 && codePoint <= 0x110f9)
+ return true;
+ if (codePoint >= 0x11100 && codePoint <= 0x11134)
+ return true;
+ if (codePoint >= 0x11136 && codePoint <= 0x1113f)
+ return true;
+ if (codePoint >= 0x11150 && codePoint <= 0x11173)
+ return true;
+ if (codePoint >= 0x11176 && codePoint <= 0x11176)
+ return true;
+ if (codePoint >= 0x11180 && codePoint <= 0x111c4)
+ return true;
+ if (codePoint >= 0x111ca && codePoint <= 0x111cc)
+ return true;
+ if (codePoint >= 0x111d0 && codePoint <= 0x111da)
+ return true;
+ if (codePoint >= 0x111dc && codePoint <= 0x111dc)
+ return true;
+ if (codePoint >= 0x11200 && codePoint <= 0x11211)
+ return true;
+ if (codePoint >= 0x11213 && codePoint <= 0x11237)
+ return true;
+ if (codePoint >= 0x1123e && codePoint <= 0x1123e)
+ return true;
+ if (codePoint >= 0x11280 && codePoint <= 0x11286)
+ return true;
+ if (codePoint >= 0x11288 && codePoint <= 0x11288)
+ return true;
+ if (codePoint >= 0x1128a && codePoint <= 0x1128d)
+ return true;
+ if (codePoint >= 0x1128f && codePoint <= 0x1129d)
+ return true;
+ if (codePoint >= 0x1129f && codePoint <= 0x112a8)
+ return true;
+ if (codePoint >= 0x112b0 && codePoint <= 0x112ea)
+ return true;
+ if (codePoint >= 0x112f0 && codePoint <= 0x112f9)
+ return true;
+ if (codePoint >= 0x11300 && codePoint <= 0x11303)
+ return true;
+ if (codePoint >= 0x11305 && codePoint <= 0x1130c)
+ return true;
+ if (codePoint >= 0x1130f && codePoint <= 0x11310)
+ return true;
+ if (codePoint >= 0x11313 && codePoint <= 0x11328)
+ return true;
+ if (codePoint >= 0x1132a && codePoint <= 0x11330)
+ return true;
+ if (codePoint >= 0x11332 && codePoint <= 0x11333)
+ return true;
+ if (codePoint >= 0x11335 && codePoint <= 0x11339)
+ return true;
+ if (codePoint >= 0x1133c && codePoint <= 0x11344)
+ return true;
+ if (codePoint >= 0x11347 && codePoint <= 0x11348)
+ return true;
+ if (codePoint >= 0x1134b && codePoint <= 0x1134d)
+ return true;
+ if (codePoint >= 0x11350 && codePoint <= 0x11350)
+ return true;
+ if (codePoint >= 0x11357 && codePoint <= 0x11357)
+ return true;
+ if (codePoint >= 0x1135d && codePoint <= 0x11363)
+ return true;
+ if (codePoint >= 0x11366 && codePoint <= 0x1136c)
+ return true;
+ if (codePoint >= 0x11370 && codePoint <= 0x11374)
+ return true;
+ if (codePoint >= 0x11400 && codePoint <= 0x1144a)
+ return true;
+ if (codePoint >= 0x11450 && codePoint <= 0x11459)
+ return true;
+ if (codePoint >= 0x11480 && codePoint <= 0x114c5)
+ return true;
+ if (codePoint >= 0x114c7 && codePoint <= 0x114c7)
+ return true;
+ if (codePoint >= 0x114d0 && codePoint <= 0x114d9)
+ return true;
+ if (codePoint >= 0x11580 && codePoint <= 0x115b5)
+ return true;
+ if (codePoint >= 0x115b8 && codePoint <= 0x115c0)
+ return true;
+ if (codePoint >= 0x115d8 && codePoint <= 0x115dd)
+ return true;
+ if (codePoint >= 0x11600 && codePoint <= 0x11640)
+ return true;
+ if (codePoint >= 0x11644 && codePoint <= 0x11644)
+ return true;
+ if (codePoint >= 0x11650 && codePoint <= 0x11659)
+ return true;
+ if (codePoint >= 0x11680 && codePoint <= 0x116b7)
+ return true;
+ if (codePoint >= 0x116c0 && codePoint <= 0x116c9)
+ return true;
+ if (codePoint >= 0x11700 && codePoint <= 0x11719)
+ return true;
+ if (codePoint >= 0x1171d && codePoint <= 0x1172b)
+ return true;
+ if (codePoint >= 0x11730 && codePoint <= 0x11739)
+ return true;
+ if (codePoint >= 0x118a0 && codePoint <= 0x118e9)
+ return true;
+ if (codePoint >= 0x118ff && codePoint <= 0x118ff)
+ return true;
+ if (codePoint >= 0x11ac0 && codePoint <= 0x11af8)
+ return true;
+ if (codePoint >= 0x11c00 && codePoint <= 0x11c08)
+ return true;
+ if (codePoint >= 0x11c0a && codePoint <= 0x11c36)
+ return true;
+ if (codePoint >= 0x11c38 && codePoint <= 0x11c40)
+ return true;
+ if (codePoint >= 0x11c50 && codePoint <= 0x11c59)
+ return true;
+ if (codePoint >= 0x11c72 && codePoint <= 0x11c8f)
+ return true;
+ if (codePoint >= 0x11c92 && codePoint <= 0x11ca7)
+ return true;
+ if (codePoint >= 0x11ca9 && codePoint <= 0x11cb6)
+ return true;
+ if (codePoint >= 0x12000 && codePoint <= 0x12399)
+ return true;
+ if (codePoint >= 0x12400 && codePoint <= 0x1246e)
+ return true;
+ if (codePoint >= 0x12480 && codePoint <= 0x12543)
+ return true;
+ if (codePoint >= 0x13000 && codePoint <= 0x1342e)
+ return true;
+ if (codePoint >= 0x14400 && codePoint <= 0x14646)
+ return true;
+ if (codePoint >= 0x16800 && codePoint <= 0x16a38)
+ return true;
+ if (codePoint >= 0x16a40 && codePoint <= 0x16a5e)
+ return true;
+ if (codePoint >= 0x16a60 && codePoint <= 0x16a69)
+ return true;
+ if (codePoint >= 0x16ad0 && codePoint <= 0x16aed)
+ return true;
+ if (codePoint >= 0x16af0 && codePoint <= 0x16af4)
+ return true;
+ if (codePoint >= 0x16b00 && codePoint <= 0x16b36)
+ return true;
+ if (codePoint >= 0x16b40 && codePoint <= 0x16b43)
+ return true;
+ if (codePoint >= 0x16b50 && codePoint <= 0x16b59)
+ return true;
+ if (codePoint >= 0x16b63 && codePoint <= 0x16b77)
+ return true;
+ if (codePoint >= 0x16b7d && codePoint <= 0x16b8f)
+ return true;
+ if (codePoint >= 0x16f00 && codePoint <= 0x16f44)
+ return true;
+ if (codePoint >= 0x16f50 && codePoint <= 0x16f7e)
+ return true;
+ if (codePoint >= 0x16f8f && codePoint <= 0x16f9f)
+ return true;
+ if (codePoint >= 0x16fe0 && codePoint <= 0x16fe0)
+ return true;
+ if (codePoint >= 0x17000 && codePoint <= 0x187ec)
+ return true;
+ if (codePoint >= 0x18800 && codePoint <= 0x18af2)
+ return true;
+ if (codePoint >= 0x1b000 && codePoint <= 0x1b001)
+ return true;
+ if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a)
+ return true;
+ if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c)
+ return true;
+ if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88)
+ return true;
+ if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99)
+ return true;
+ if (codePoint >= 0x1bc9d && codePoint <= 0x1bc9e)
+ return true;
+ if (codePoint >= 0x1d165 && codePoint <= 0x1d169)
+ return true;
+ if (codePoint >= 0x1d16d && codePoint <= 0x1d172)
+ return true;
+ if (codePoint >= 0x1d17b && codePoint <= 0x1d182)
+ return true;
+ if (codePoint >= 0x1d185 && codePoint <= 0x1d18b)
+ return true;
+ if (codePoint >= 0x1d1aa && codePoint <= 0x1d1ad)
+ return true;
+ if (codePoint >= 0x1d242 && codePoint <= 0x1d244)
+ return true;
+ if (codePoint >= 0x1d400 && codePoint <= 0x1d454)
+ return true;
+ if (codePoint >= 0x1d456 && codePoint <= 0x1d49c)
+ return true;
+ if (codePoint >= 0x1d49e && codePoint <= 0x1d49f)
+ return true;
+ if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2)
+ return true;
+ if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6)
+ return true;
+ if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac)
+ return true;
+ if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9)
+ return true;
+ if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb)
+ return true;
+ if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3)
+ return true;
+ if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505)
+ return true;
+ if (codePoint >= 0x1d507 && codePoint <= 0x1d50a)
+ return true;
+ if (codePoint >= 0x1d50d && codePoint <= 0x1d514)
+ return true;
+ if (codePoint >= 0x1d516 && codePoint <= 0x1d51c)
+ return true;
+ if (codePoint >= 0x1d51e && codePoint <= 0x1d539)
+ return true;
+ if (codePoint >= 0x1d53b && codePoint <= 0x1d53e)
+ return true;
+ if (codePoint >= 0x1d540 && codePoint <= 0x1d544)
+ return true;
+ if (codePoint >= 0x1d546 && codePoint <= 0x1d546)
+ return true;
+ if (codePoint >= 0x1d54a && codePoint <= 0x1d550)
+ return true;
+ if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5)
+ return true;
+ if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0)
+ return true;
+ if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da)
+ return true;
+ if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa)
+ return true;
+ if (codePoint >= 0x1d6fc && codePoint <= 0x1d714)
+ return true;
+ if (codePoint >= 0x1d716 && codePoint <= 0x1d734)
+ return true;
+ if (codePoint >= 0x1d736 && codePoint <= 0x1d74e)
+ return true;
+ if (codePoint >= 0x1d750 && codePoint <= 0x1d76e)
+ return true;
+ if (codePoint >= 0x1d770 && codePoint <= 0x1d788)
+ return true;
+ if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8)
+ return true;
+ if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2)
+ return true;
+ if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb)
+ return true;
+ if (codePoint >= 0x1d7ce && codePoint <= 0x1d7ff)
+ return true;
+ if (codePoint >= 0x1da00 && codePoint <= 0x1da36)
+ return true;
+ if (codePoint >= 0x1da3b && codePoint <= 0x1da6c)
+ return true;
+ if (codePoint >= 0x1da75 && codePoint <= 0x1da75)
+ return true;
+ if (codePoint >= 0x1da84 && codePoint <= 0x1da84)
+ return true;
+ if (codePoint >= 0x1da9b && codePoint <= 0x1da9f)
+ return true;
+ if (codePoint >= 0x1daa1 && codePoint <= 0x1daaf)
+ return true;
+ if (codePoint >= 0x1e000 && codePoint <= 0x1e006)
+ return true;
+ if (codePoint >= 0x1e008 && codePoint <= 0x1e018)
+ return true;
+ if (codePoint >= 0x1e01b && codePoint <= 0x1e021)
+ return true;
+ if (codePoint >= 0x1e023 && codePoint <= 0x1e024)
+ return true;
+ if (codePoint >= 0x1e026 && codePoint <= 0x1e02a)
+ return true;
+ if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4)
+ return true;
+ if (codePoint >= 0x1e8d0 && codePoint <= 0x1e8d6)
+ return true;
+ if (codePoint >= 0x1e900 && codePoint <= 0x1e94a)
+ return true;
+ if (codePoint >= 0x1e950 && codePoint <= 0x1e959)
+ return true;
+ if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03)
+ return true;
+ if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f)
+ return true;
+ if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22)
+ return true;
+ if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24)
+ return true;
+ if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27)
+ return true;
+ if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32)
+ return true;
+ if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37)
+ return true;
+ if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39)
+ return true;
+ if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b)
+ return true;
+ if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42)
+ return true;
+ if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47)
+ return true;
+ if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49)
+ return true;
+ if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b)
+ return true;
+ if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f)
+ return true;
+ if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52)
+ return true;
+ if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54)
+ return true;
+ if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57)
+ return true;
+ if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59)
+ return true;
+ if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b)
+ return true;
+ if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d)
+ return true;
+ if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f)
+ return true;
+ if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62)
+ return true;
+ if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64)
+ return true;
+ if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a)
+ return true;
+ if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72)
+ return true;
+ if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77)
+ return true;
+ if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c)
+ return true;
+ if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e)
+ return true;
+ if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89)
+ return true;
+ if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b)
+ return true;
+ if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3)
+ return true;
+ if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9)
+ return true;
+ if (codePoint >= 0x1eeab && codePoint <= 0x1eebb)
+ return true;
+ if (codePoint >= 0x20000 && codePoint <= 0x2a6d6)
+ return true;
+ if (codePoint >= 0x2a700 && codePoint <= 0x2b734)
+ return true;
+ if (codePoint >= 0x2b740 && codePoint <= 0x2b81d)
+ return true;
+ if (codePoint >= 0x2b820 && codePoint <= 0x2cea1)
+ return true;
+ if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d)
+ return true;
+ if (codePoint >= 0xe0100 && codePoint <= 0xe01ef)
+ return true;
+ return false;
+}
diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h
index 8b538d06de..bdac848fbd 100644
--- a/js/src/vm/Unicode.h
+++ b/js/src/vm/Unicode.h
@@ -143,11 +143,15 @@ IsIdentifierStart(char16_t ch)
return CharInfo(ch).isUnicodeIDStart();
}
+bool
+IsIdentifierStartNonBMP(uint32_t codePoint);
+
inline bool
IsIdentifierStart(uint32_t codePoint)
{
- // TODO: Supplemental code points not yet supported (bug 1197230).
- return codePoint <= UTF16Max && IsIdentifierStart(char16_t(codePoint));
+ if (MOZ_UNLIKELY(codePoint > UTF16Max))
+ return IsIdentifierStartNonBMP(codePoint);
+ return IsIdentifierStart(char16_t(codePoint));
}
inline bool
@@ -170,11 +174,16 @@ IsIdentifierPart(char16_t ch)
return CharInfo(ch).isUnicodeIDContinue();
}
+
+bool
+IsIdentifierPartNonBMP(uint32_t codePoint);
+
inline bool
IsIdentifierPart(uint32_t codePoint)
{
- // TODO: Supplemental code points not yet supported (bug 1197230).
- return codePoint <= UTF16Max && IsIdentifierPart(char16_t(codePoint));
+ if (MOZ_UNLIKELY(codePoint > UTF16Max))
+ return IsIdentifierPartNonBMP(codePoint);
+ return IsIdentifierPart(char16_t(codePoint));
}
inline bool
@@ -183,6 +192,17 @@ IsUnicodeIDStart(char16_t ch)
return CharInfo(ch).isUnicodeIDStart();
}
+bool
+IsUnicodeIDStartNonBMP(uint32_t codePoint);
+
+inline bool
+IsUnicodeIDStart(uint32_t codePoint)
+{
+ if (MOZ_UNLIKELY(codePoint > UTF16Max))
+ return IsIdentifierStartNonBMP(codePoint);
+ return IsUnicodeIDStart(char16_t(codePoint));
+}
+
inline bool
IsSpace(char16_t ch)
{
diff --git a/js/src/vm/UnicodeNonBMP.h b/js/src/vm/UnicodeNonBMP.h
index 6cc64cde4a..f99e227cdd 100644
--- a/js/src/vm/UnicodeNonBMP.h
+++ b/js/src/vm/UnicodeNonBMP.h
@@ -10,6 +10,16 @@
#ifndef vm_UnicodeNonBMP_h
#define vm_UnicodeNonBMP_h
+// |macro| receives the following arguments
+// macro(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF)
+// FROM: code point where the range starts
+// TO: code point where the range ends
+// LEAD: common lead surrogate of FROM and TO
+// TRAIL_FROM: trail surrogate of FROM
+// TRAIL_FROM: trail surrogate of TO
+// DIFF: the difference between the code point in the range and
+// converted code point
+
#define FOR_EACH_NON_BMP_LOWERCASE(macro) \
macro(0x10400, 0x10427, 0xd801, 0xdc00, 0xdc27, 40) \
macro(0x104b0, 0x104d3, 0xd801, 0xdcb0, 0xdcd3, 40) \
diff --git a/js/src/vm/make_unicode.py b/js/src/vm/make_unicode.py
index 73c090ac91..83f0d004b3 100755
--- a/js/src/vm/make_unicode.py
+++ b/js/src/vm/make_unicode.py
@@ -155,37 +155,65 @@ def utf16_encode(code):
return lead, trail
def make_non_bmp_convert_macro(out_file, name, convert_map):
+ # Find continuous range in convert_map.
convert_list = []
entry = None
for code in sorted(convert_map.keys()):
+ lead, trail = utf16_encode(code)
converted = convert_map[code]
diff = converted - code
- if entry and code == entry['code'] + entry['length'] and diff == entry['diff']:
+ if (entry and code == entry['code'] + entry['length'] and
+ diff == entry['diff'] and lead == entry['lead']):
+
entry['length'] += 1
continue
- entry = { 'code': code, 'diff': diff, 'length': 1 }
+ entry = {
+ 'code': code,
+ 'diff': diff,
+ 'length': 1,
+ 'lead': lead,
+ 'trail': trail,
+ }
convert_list.append(entry)
+ # Generate macro call for each range.
lines = []
for entry in convert_list:
from_code = entry['code']
to_code = entry['code'] + entry['length'] - 1
diff = entry['diff']
- from_lead, from_trail = utf16_encode(from_code)
- to_lead, to_trail = utf16_encode(to_code)
-
- assert from_lead == to_lead
+ lead = entry['lead']
+ from_trail = entry['trail']
+ to_trail = entry['trail'] + entry['length'] - 1
lines.append(' macro(0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, {:d})'.format(
- from_code, to_code, from_lead, from_trail, to_trail, diff))
+ from_code, to_code, lead, from_trail, to_trail, diff))
out_file.write('#define FOR_EACH_NON_BMP_{}(macro) \\\n'.format(name))
out_file.write(' \\\n'.join(lines))
out_file.write('\n')
+def for_each_non_bmp_group(group_set):
+ # Find continuous range in group_set.
+ group_list = []
+ entry = None
+ for code in sorted(group_set.keys()):
+ if entry and code == entry['code'] + entry['length']:
+ entry['length'] += 1
+ continue
+
+ entry = {
+ 'code': code,
+ 'length': 1
+ }
+ group_list.append(entry)
+
+ for entry in group_list:
+ yield (entry['code'], entry['code'] + entry['length'] - 1)
+
def process_derived_core_properties(derived_core_properties):
id_start = set()
id_continue = set()
@@ -214,6 +242,9 @@ def process_unicode_data(unicode_data, derived_core_properties):
non_bmp_lower_map = {}
non_bmp_upper_map = {}
+ non_bmp_id_start_set = {}
+ non_bmp_id_cont_set = {}
+ non_bmp_space_set = {}
(id_start, id_continue) = process_derived_core_properties(derived_core_properties)
@@ -246,6 +277,13 @@ def process_unicode_data(unicode_data, derived_core_properties):
non_bmp_lower_map[code] = lower
if code != upper:
non_bmp_upper_map[code] = upper
+ if category == 'Zs':
+ non_bmp_space_set[code] = 1
+ test_space_table.append(code)
+ if code in id_start:
+ non_bmp_id_start_set[code] = 1
+ if code in id_continue:
+ non_bmp_id_cont_set[code] = 1
continue
# we combine whitespace and lineterminators because in pratice we don't need them separated
@@ -315,6 +353,8 @@ def process_unicode_data(unicode_data, derived_core_properties):
table, index,
same_upper_table, same_upper_index,
non_bmp_lower_map, non_bmp_upper_map,
+ non_bmp_space_set,
+ non_bmp_id_start_set, non_bmp_id_cont_set,
test_table, test_space_table,
)
@@ -412,6 +452,16 @@ def make_non_bmp_file(version,
#ifndef vm_UnicodeNonBMP_h
#define vm_UnicodeNonBMP_h
+// |macro| receives the following arguments
+// macro(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF)
+// FROM: code point where the range starts
+// TO: code point where the range ends
+// LEAD: common lead surrogate of FROM and TO
+// TRAIL_FROM: trail surrogate of FROM
+// TRAIL_FROM: trail surrogate of TO
+// DIFF: the difference between the code point in the range and
+// converted code point
+
""")
make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map)
@@ -525,7 +575,9 @@ if (typeof reportCompare === "function")
def make_unicode_file(version,
table, index,
same_upper_table, same_upper_index,
- folding_table, folding_index):
+ folding_table, folding_index,
+ non_bmp_space_set,
+ non_bmp_id_start_set, non_bmp_id_cont_set):
index1, index2, shift = splitbins(index)
# Don't forget to update CharInfo in Unicode.h if you need to change this
@@ -682,6 +734,43 @@ def make_unicode_file(version,
dump(folding_index2, 'folding_index2', data_file)
data_file.write('\n')
+ # If the following assert fails, it means space character is added to
+ # non-BMP area. In that case the following code should be uncommented
+ # and the corresponding code should be added to frontend.
+ assert len(non_bmp_space_set.keys()) == 0
+
+ data_file.write("""\
+bool
+js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint)
+{
+""")
+
+ for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_start_set):
+ data_file.write("""\
+ if (codePoint >= 0x{:x} && codePoint <= 0x{:x})
+ return true;
+""".format(from_code, to_code))
+
+ data_file.write("""\
+ return false;
+}
+
+bool
+js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint)
+{
+""")
+
+ for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_cont_set):
+ data_file.write("""\
+ if (codePoint >= 0x{:x} && codePoint <= 0x{:x})
+ return true;
+""".format(from_code, to_code))
+
+ data_file.write("""\
+ return false;
+}
+""")
+
def getsize(data):
""" return smallest possible integer size for the given array """
maxdata = max(data)
@@ -1000,6 +1089,8 @@ def update_unicode(args):
table, index,
same_upper_table, same_upper_index,
non_bmp_lower_map, non_bmp_upper_map,
+ non_bmp_space_set,
+ non_bmp_id_start_set, non_bmp_id_cont_set,
test_table, test_space_table
) = process_unicode_data(unicode_data, derived_core_properties)
(
@@ -1012,7 +1103,9 @@ def update_unicode(args):
make_unicode_file(unicode_version,
table, index,
same_upper_table, same_upper_index,
- folding_table, folding_index)
+ folding_table, folding_index,
+ non_bmp_space_set,
+ non_bmp_id_start_set, non_bmp_id_cont_set)
make_non_bmp_file(unicode_version,
non_bmp_lower_map, non_bmp_upper_map,
non_bmp_folding_map, non_bmp_rev_folding_map)