diff options
author | Moonchild <moonchild@palemoon.org> | 2021-02-11 13:55:24 +0000 |
---|---|---|
committer | Moonchild <moonchild@palemoon.org> | 2021-02-11 13:55:24 +0000 |
commit | 9ca880e20012f560af27ae624aaf37a3b19eba99 (patch) | |
tree | f4105248670f38fc13557dd4c69e6431a9ba8007 | |
parent | 2c72b8859a959629462a58b1385408e25bb89bad (diff) | |
download | uxp-9ca880e20012f560af27ae624aaf37a3b19eba99.tar.gz |
Issue #1738 - Part 2: Implement well-formed JSON stringify
This implements the ES2019 spec for JSON stringification, including
lower-casing, properly escaping lone surrogates, etc.
-rw-r--r-- | js/src/json.cpp | 69 | ||||
-rw-r--r-- | js/src/vm/Unicode.h | 13 |
2 files changed, 68 insertions, 14 deletions
diff --git a/js/src/json.cpp b/js/src/json.cpp index 73e37e2370..e32994e908 100644 --- a/js/src/json.cpp +++ b/js/src/json.cpp @@ -66,26 +66,67 @@ InfallibleQuote(RangedPtr<const SrcCharT> srcBegin, RangedPtr<const SrcCharT> sr /* Step 1. */ *dstPtr++ = '"'; + // XXX: This is a rather ugly in-line definition. Move it somewhere better? + auto ToLowerHex = [](uint8_t u) { + MOZ_ASSERT(u <= 0xF); + return "0123456789abcdef"[u]; + }; + /* Step 2. */ while (srcBegin != srcEnd) { - SrcCharT c = *srcBegin++; - size_t escapeIndex = c % sizeof(escapeLookup); - Latin1Char escaped = escapeLookup[escapeIndex]; - if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) { + const SrcCharT c = *srcBegin++; + + // Handle the Latin-1 cases. + if (MOZ_LIKELY(c < sizeof(escapeLookup))) { + Latin1Char escaped = escapeLookup[c]; + + // Directly copy non-escaped code points. + if (escaped == 0) { + *dstPtr++ = c; + continue; + } + + // Escape the rest, elaborating Unicode escapes when needed. + *dstPtr++ = '\\'; + *dstPtr++ = escaped; + if (escaped == 'u') { + *dstPtr++ = '0'; + *dstPtr++ = '0'; + + uint8_t x = c >> 4; + MOZ_ASSERT(x < 10); + *dstPtr++ = '0' + x; + + *dstPtr++ = ToLowerHex(c & 0xF); + } + + continue; + } + + // Non-ASCII non-surrogates are directly copied. + if (!unicode::IsSurrogate(c)) { *dstPtr++ = c; continue; } - *dstPtr++ = '\\'; - *dstPtr++ = escaped; - if (escaped == 'u') { - MOZ_ASSERT(c < ' '); - MOZ_ASSERT((c >> 4) < 10); - uint8_t x = c >> 4, y = c % 16; - *dstPtr++ = '0'; - *dstPtr++ = '0'; - *dstPtr++ = '0' + x; - *dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10); + + // So too for complete surrogate pairs. + if (MOZ_LIKELY(unicode::IsLeadSurrogate(c) && + srcBegin < srcEnd && + unicode::IsTrailSurrogate(*srcBegin))) + { + *dstPtr++ = c; + *dstPtr++ = *srcBegin++; + continue; } + + // But lone surrogates are Unicode-escaped. + char32_t as32 = char32_t(c); + *dstPtr++ = '\\'; + *dstPtr++ = 'u'; + *dstPtr++ = ToLowerHex(as32 >> 12); + *dstPtr++ = ToLowerHex((as32 >> 8) & 0xF); + *dstPtr++ = ToLowerHex((as32 >> 4) & 0xF); + *dstPtr++ = ToLowerHex(as32 & 0xF); } /* Steps 3-4. */ diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h index e470f43418..b1e3e17c61 100644 --- a/js/src/vm/Unicode.h +++ b/js/src/vm/Unicode.h @@ -466,6 +466,19 @@ IsTrailSurrogate(uint32_t codePoint) return codePoint >= TrailSurrogateMin && codePoint <= TrailSurrogateMax; } +/** + * Returns true if the given value is a UTF-16 surrogate. + * + * This function is intended for use in contexts where 32-bit values may need + * to be tested to see if they reside in the surrogate range, so it doesn't + * just take char16_t. + */ +inline bool +IsSurrogate(uint32_t codePoint) +{ + return LeadSurrogateMin <= codePoint && codePoint <= TrailSurrogateMax; +} + inline char16_t LeadSurrogate(uint32_t codePoint) { |