diff options
author | Martok <martok@martoks-place.de> | 2023-06-29 23:05:33 +0200 |
---|---|---|
committer | Martok <martok@martoks-place.de> | 2023-06-29 23:05:33 +0200 |
commit | f168e0afe965d2d860e9f2ad8e2ca6cf26ec0b41 (patch) | |
tree | f2d89d26004389b9dad51896f19293915773a846 | |
parent | 9eb285a9fb89cfd64ca9c9cba77746af4547f0a4 (diff) | |
download | uxp-f168e0afe965d2d860e9f2ad8e2ca6cf26ec0b41.tar.gz |
Issue #2259 - Reimplement String.prototype.toLocale{Lower,Upper}Case per ECMAScript Intl specification
- Update make_unicode to output SpecialCasing
- Handle special casing
- Use realloc instead of malloc when resizing a newly created string buffer
Based-on: m-c 1318403, 1431957
-rw-r--r-- | config/check_spidermonkey_style.py | 1 | ||||
-rw-r--r-- | js/src/builtin/String.js | 88 | ||||
-rw-r--r-- | js/src/builtin/intl/CommonFunctions.js | 58 | ||||
-rw-r--r-- | js/src/builtin/intl/make_intl_data.py | 4 | ||||
-rw-r--r-- | js/src/jsapi.h | 4 | ||||
-rw-r--r-- | js/src/jscntxt.h | 1 | ||||
-rw-r--r-- | js/src/jsstr.cpp | 639 | ||||
-rw-r--r-- | js/src/jsstr.h | 27 | ||||
-rw-r--r-- | js/src/vm/SelfHosting.cpp | 6 | ||||
-rw-r--r-- | js/src/vm/SpecialCasing.txt | 281 | ||||
-rw-r--r-- | js/src/vm/Unicode.cpp | 2616 | ||||
-rw-r--r-- | js/src/vm/Unicode.h | 57 | ||||
-rw-r--r-- | js/src/vm/UnicodeNonBMP.h | 24 | ||||
-rwxr-xr-x | js/src/vm/make_unicode.py | 675 |
14 files changed, 3190 insertions, 1291 deletions
diff --git a/config/check_spidermonkey_style.py b/config/check_spidermonkey_style.py index cb9e2418f2..5f06e6ad93 100644 --- a/config/check_spidermonkey_style.py +++ b/config/check_spidermonkey_style.py @@ -82,6 +82,7 @@ included_inclnames_to_ignore = set([ 'unicode/plurrule.h', # ICU 'unicode/timezone.h', # ICU 'unicode/ucal.h', # ICU + 'unicode/uchar.h', # ICU 'unicode/uclean.h', # ICU 'unicode/ucol.h', # ICU 'unicode/udat.h', # ICU diff --git a/js/src/builtin/String.js b/js/src/builtin/String.js index b0928fe88c..0fab35966a 100644 --- a/js/src/builtin/String.js +++ b/js/src/builtin/String.js @@ -731,6 +731,88 @@ function String_localeCompare(that) { return intl_CompareStrings(collator, S, That); } +/** + * 13.1.2 String.prototype.toLocaleLowerCase ( [ locales ] ) + * + * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b + */ +function String_toLocaleLowerCase() { + // Step 1. + RequireObjectCoercible(this); + + // Step 2. + var string = ToString(this); + + // Handle the common cases (no locales argument or a single string + // argument) first. + var locales = arguments.length > 0 ? arguments[0] : undefined; + var requestedLocale; + if (locales === undefined) { + // Steps 3, 6. + requestedLocale = undefined; + } else if (typeof locales === "string") { + // Steps 3, 5. + requestedLocale = ValidateAndCanonicalizeLanguageTag(locales); + } else { + // Step 3. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Steps 4-6. + requestedLocale = requestedLocales.length > 0 ? requestedLocales[0] : undefined; + } + + // Trivial case: When the input is empty, directly return the empty string. + if (string.length === 0) + return ""; + + if (requestedLocale === undefined) + requestedLocale = DefaultLocale(); + + // Steps 7-16. + return intl_toLocaleLowerCase(string, requestedLocale); +} + +/** + * 13.1.3 String.prototype.toLocaleUpperCase ( [ locales ] ) + * + * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b + */ +function String_toLocaleUpperCase() { + // Step 1. + RequireObjectCoercible(this); + + // Step 2. + var string = ToString(this); + + // Handle the common cases (no locales argument or a single string + // argument) first. + var locales = arguments.length > 0 ? arguments[0] : undefined; + var requestedLocale; + if (locales === undefined) { + // Steps 3, 6. + requestedLocale = undefined; + } else if (typeof locales === "string") { + // Steps 3, 5. + requestedLocale = ValidateAndCanonicalizeLanguageTag(locales); + } else { + // Step 3. + var requestedLocales = CanonicalizeLocaleList(locales); + + // Steps 4-6. + requestedLocale = requestedLocales.length > 0 ? requestedLocales[0] : undefined; + } + + // Trivial case: When the input is empty, directly return the empty string. + if (string.length === 0) + return ""; + + if (requestedLocale === undefined) + requestedLocale = DefaultLocale(); + + // Steps 7-16. + return intl_toLocaleUpperCase(string, requestedLocale); +} + /* ES6 Draft May 22, 2014 21.1.2.4 */ function String_static_raw(callSite, ...substitutions) { // Step 1 (implicit). @@ -1014,13 +1096,15 @@ _SetCanonicalName(String_static_trimEnd, "trimEnd"); function String_static_toLocaleLowerCase(string) { if (arguments.length < 1) ThrowTypeError(JSMSG_MISSING_FUN_ARG, 0, 'String.toLocaleLowerCase'); - return callFunction(std_String_toLocaleLowerCase, string); + var locales = arguments.length > 1 ? arguments[1] : undefined; + return callFunction(String_toLocaleLowerCase, string, locales); } function String_static_toLocaleUpperCase(string) { if (arguments.length < 1) ThrowTypeError(JSMSG_MISSING_FUN_ARG, 0, 'String.toLocaleUpperCase'); - return callFunction(std_String_toLocaleUpperCase, string); + var locales = arguments.length > 1 ? arguments[1] : undefined; + return callFunction(String_toLocaleUpperCase, string, locales); } function String_static_normalize(string) { diff --git a/js/src/builtin/intl/CommonFunctions.js b/js/src/builtin/intl/CommonFunctions.js index 10e02a5ac6..c1999f001e 100644 --- a/js/src/builtin/intl/CommonFunctions.js +++ b/js/src/builtin/intl/CommonFunctions.js @@ -446,6 +446,64 @@ function CanonicalizeLanguageTag(locale) { return canonical; } + +/** + * Returns true if the input contains only ASCII alphabetical characters. + */ +function IsASCIIAlphaString(s) { + assert(typeof s === "string", "IsASCIIAlphaString"); + + for (var i = 0; i < s.length; i++) { + var c = callFunction(std_String_charCodeAt, s, i); + if (!((0x41 <= c && c <= 0x5A) || (0x61 <= c && c <= 0x7A))) + return false + } + return true; +} + + +/** + * Validates and canonicalizes the given language tag. + */ +function ValidateAndCanonicalizeLanguageTag(locale) { + assert(typeof locale === "string", "ValidateAndCanonicalizeLanguageTag"); + + // Handle the common case (a standalone language) first. + // Only the following BCP47 subset is accepted: + // Language-Tag = langtag + // langtag = language + // language = 2*3ALPHA ; shortest ISO 639 code + // For three character long strings we need to make sure it's not a + // private use only language tag, for example "x-x". + if (locale.length === 2 || (locale.length === 3 && locale[1] !== "-")) { + if (!IsASCIIAlphaString(locale)) + ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale); + assert(IsStructurallyValidLanguageTag(locale), "2*3ALPHA is a valid language tag"); + + // The language subtag is canonicalized to lower case. + locale = callFunction(std_String_toLowerCase, locale); + + // langTagMappings doesn't contain any 2*3ALPHA keys, so we don't need + // to check for possible replacements in this map. + assert(!callFunction(std_Object_hasOwnProperty, langTagMappings, locale), + "langTagMappings contains no 2*3ALPHA mappings"); + + // Replace deprecated subtags with their preferred values. + locale = callFunction(std_Object_hasOwnProperty, langSubtagMappings, locale) + ? langSubtagMappings[locale] + : locale; + assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization"); + + return locale; + } + + if (!IsStructurallyValidLanguageTag(locale)) + ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale); + + return CanonicalizeLanguageTag(locale); +} + + function localeContainsNoUnicodeExtensions(locale) { // No "-u-", no possible Unicode extension. if (callFunction(std_String_indexOf, locale, "-u-") === -1) diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py index a81001e0f3..02bf350814 100644 --- a/js/src/builtin/intl/make_intl_data.py +++ b/js/src/builtin/intl/make_intl_data.py @@ -151,6 +151,10 @@ def readRegistry(registry): # Special case for heploc. langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97" + # ValidateAndCanonicalizeLanguageTag in Intl.js expects langTagMappings + # contains no 2*3ALPHA. + assert all(len(lang) > 3 for lang in langTagMappings.iterkeys()) + return {"fileDate": fileDate, "langTagMappings": langTagMappings, "langSubtagMappings": langSubtagMappings, diff --git a/js/src/jsapi.h b/js/src/jsapi.h index 923aa2bb05..f80d2602e6 100644 --- a/js/src/jsapi.h +++ b/js/src/jsapi.h @@ -5327,8 +5327,8 @@ JS_ResetDefaultLocale(JSContext* cx); * Locale specific string conversion and error message callbacks. */ struct JSLocaleCallbacks { - JSLocaleToUpperCase localeToUpperCase; - JSLocaleToLowerCase localeToLowerCase; + JSLocaleToUpperCase localeToUpperCase; // not used + JSLocaleToLowerCase localeToLowerCase; // not used JSLocaleCompare localeCompare; // not used JSLocaleToUnicode localeToUnicode; }; diff --git a/js/src/jscntxt.h b/js/src/jscntxt.h index 1bc426e14e..c4ef783d3a 100644 --- a/js/src/jscntxt.h +++ b/js/src/jscntxt.h @@ -365,6 +365,7 @@ struct JSContext : public js::ExclusiveContext, using ExclusiveContext::permanentAtoms; using ExclusiveContext::pod_calloc; using ExclusiveContext::pod_malloc; + using ExclusiveContext::pod_realloc; using ExclusiveContext::staticStrings; using ExclusiveContext::updateMallocCounter; using ExclusiveContext::wellKnownSymbols; diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp index 6726da9457..fdee274c32 100644 --- a/js/src/jsstr.cpp +++ b/js/src/jsstr.cpp @@ -31,10 +31,12 @@ #include "jsutil.h" #include "builtin/intl/ICUHeader.h" +#include "builtin/intl/CommonFunctions.h" #include "builtin/RegExp.h" #include "jit/InlinableNatives.h" #include "js/Conversions.h" #include "js/UniquePtr.h" +#include "unicode/uchar.h" #include "unicode/unorm2.h" #include "vm/GlobalObject.h" #include "vm/Interpreter.h" @@ -599,18 +601,209 @@ js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t l } template <typename CharT> +static auto +ReallocChars(JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t oldLength, + size_t newLength) + -> decltype(chars) +{ + using AnyCharPtr = decltype(chars); + + CharT* oldChars = chars.release(); + CharT* newChars = cx->pod_realloc<CharT>(oldChars, oldLength, newLength); + if (!newChars) { + js_free(oldChars); + return AnyCharPtr(); + } + + return AnyCharPtr(newChars); +} + +/** + * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings + * depending on its context: + * When it's preceded by a cased character and not followed by another cased + * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA. + * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA. + * + * Unicode 9.0, §3.13 Default Case Algorithms + */ +static char16_t +Final_Sigma(const char16_t* chars, size_t length, size_t index) +{ + MOZ_ASSERT(index < length); + MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA); + MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) == + unicode::GREEK_SMALL_LETTER_SIGMA); + + // Tell the analysis the BinaryProperty.contains function pointer called by + // u_hasBinaryProperty cannot GC. + JS::AutoSuppressGCAnalysis nogc; + + bool precededByCased = false; + for (size_t i = index; i > 0; ) { + char16_t c = chars[--i]; + uint32_t codePoint = c; + if (unicode::IsTrailSurrogate(c) && i > 0) { + char16_t lead = chars[i - 1]; + if (unicode::IsLeadSurrogate(lead)) { + codePoint = unicode::UTF16Decode(lead, c); + i--; + } + } + + // Ignore any characters with the property Case_Ignorable. + // NB: We need to skip over all Case_Ignorable characters, even when + // they also have the Cased binary property. + if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) + continue; + + precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED); + break; + } + if (!precededByCased) + return unicode::GREEK_SMALL_LETTER_SIGMA; + + bool followedByCased = false; + for (size_t i = index + 1; i < length; ) { + char16_t c = chars[i++]; + uint32_t codePoint = c; + if (unicode::IsLeadSurrogate(c) && i < length) { + char16_t trail = chars[i]; + if (unicode::IsTrailSurrogate(trail)) { + codePoint = unicode::UTF16Decode(c, trail); + i++; + } + } + + // Ignore any characters with the property Case_Ignorable. + // NB: We need to skip over all Case_Ignorable characters, even when + // they also have the Cased binary property. + if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) + continue; + + followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED); + break; + } + if (!followedByCased) + return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA; + + return unicode::GREEK_SMALL_LETTER_SIGMA; +} + +static Latin1Char +Final_Sigma(const Latin1Char* chars, size_t length, size_t index) +{ + MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character"); + return 0; +} + +// If |srcLength == destLength| is true, the destination buffer was allocated +// with the same size as the source buffer. When we append characters which +// have special casing mappings, we test |srcLength == destLength| to decide +// if we need to back out and reallocate a sufficiently large destination +// buffer. Otherwise the destination buffer was allocated with the correct +// size to hold all lower case mapped characters, i.e. +// |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true. +template <typename CharT> +static size_t +ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, size_t startIndex, size_t srcLength, + size_t destLength) +{ + MOZ_ASSERT(startIndex < srcLength); + MOZ_ASSERT(srcLength <= destLength); + MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength); + + size_t j = startIndex; + for (size_t i = startIndex; i < srcLength; i++) { + char16_t c = srcChars[i]; + if (!IsSame<CharT, Latin1Char>::value) { + if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) { + char16_t trail = srcChars[i + 1]; + if (unicode::IsTrailSurrogate(trail)) { + trail = unicode::ToLowerCaseNonBMPTrail(c, trail); + destChars[j++] = c; + destChars[j++] = trail; + i++; + continue; + } + } + + // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE + // lowercases to <U+0069 U+0307>. + if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { + // Return if the output buffer is too small. + if (srcLength == destLength) + return i; + + destChars[j++] = CharT('i'); + destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE); + continue; + } + + // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to + // one of two codepoints depending on context. + if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) { + destChars[j++] = Final_Sigma(srcChars, srcLength, i); + continue; + } + } + + c = unicode::ToLowerCase(c); + MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR); + destChars[j++] = c; + } + + MOZ_ASSERT(j == destLength); + destChars[destLength] = '\0'; + + return srcLength; +} + +static size_t +ToLowerCaseLength(const char16_t* chars, size_t startIndex, size_t length) +{ + size_t lowerLength = length; + for (size_t i = startIndex; i < length; i++) { + char16_t c = chars[i]; + + // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>. + if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) + lowerLength += 1; + } + return lowerLength; +} + +static size_t +ToLowerCaseLength(const Latin1Char* chars, size_t startIndex, size_t length) +{ + MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings"); + return 0; +} + +template <typename CharT> static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) { - // Unlike toUpperCase, toLowerCase has the nice invariant that if the input - // is a Latin1 string, the output is also a Latin1 string. - UniquePtr<CharT[], JS::FreePolicy> newChars; - size_t length = str->length(); + // Unlike toUpperCase, toLowerCase has the nice invariant that if the + // input is a Latin-1 string, the output is also a Latin-1 string. + using AnyCharPtr = UniquePtr<CharT[], JS::FreePolicy>; + + AnyCharPtr newChars; + const size_t length = str->length(); + size_t resultLength; { AutoCheckCannotGC nogc; const CharT* chars = str->chars<CharT>(nogc); - // Look for the first upper case character. + // We don't need extra special casing checks in the loop below, + // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3 + // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings. + MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE), + "U+0130 has a simple lower case mapping"); + MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA), + "U+03A3 has a simple lower case mapping"); + + // Look for the first character that changes when lowercased. size_t i = 0; for (; i < length; i++) { char16_t c = chars[i]; @@ -630,40 +823,35 @@ ToLowerCase(JSContext* cx, JSLinearString* str) break; } - // If all characters are lower case, return the input string. + // If no character needs to change, return the input string. if (i == length) return str; - newChars = cx->make_pod_array<CharT>(length + 1); + resultLength = length; + newChars = cx->make_pod_array<CharT>(resultLength + 1); if (!newChars) return nullptr; PodCopy(newChars.get(), chars, i); - for (; i < length; i++) { - char16_t c = chars[i]; - if (!IsSame<CharT, Latin1Char>::value) { - if (unicode::IsLeadSurrogate(c) && i + 1 < length) { - char16_t trail = chars[i + 1]; - if (unicode::IsTrailSurrogate(trail)) { - trail = unicode::ToLowerCaseNonBMPTrail(c, trail); - newChars[i] = c; - newChars[i + 1] = trail; - i++; - continue; - } - } - } + size_t readChars = ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength); + if (readChars < length) { + MOZ_ASSERT((!IsSame<CharT, Latin1Char>::value), + "Latin-1 strings don't have special lower case mappings"); + resultLength = ToLowerCaseLength(chars, readChars, length); - c = unicode::ToLowerCase(c); - MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR); - newChars[i] = c; - } + AnyCharPtr buf = ReallocChars(cx, Move(newChars), length + 1, resultLength + 1); + if (!buf) + return nullptr; - newChars[length] = 0; + newChars = Move(buf); + + MOZ_ALWAYS_TRUE(length == + ToLowerCaseImpl(newChars.get(), chars, readChars, length, resultLength)); + } } - JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length); + JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), resultLength); if (!res) return nullptr; @@ -671,104 +859,295 @@ ToLowerCase(JSContext* cx, JSLinearString* str) return res; } -static inline bool -ToLowerCaseHelper(JSContext* cx, const CallArgs& args) +JSString* +js::StringToLowerCase(JSContext* cx, HandleLinearString string) +{ + if (string->hasLatin1Chars()) + return ToLowerCase<Latin1Char>(cx, string); + return ToLowerCase<char16_t>(cx, string); +} + +bool +js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; - JSLinearString* linear = str->ensureLinear(cx); + RootedLinearString linear(cx, str->ensureLinear(cx)); if (!linear) return false; - if (linear->hasLatin1Chars()) - str = ToLowerCase<Latin1Char>(cx, linear); - else - str = ToLowerCase<char16_t>(cx, linear); - if (!str) + JSString* result = StringToLowerCase(cx, linear); + if (!result) return false; - args.rval().setString(str); + args.rval().setString(result); return true; } -bool -js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) +static const char* +CaseMappingLocale(JSContext* cx, JSString* str) { - return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp)); + JSLinearString* locale = str->ensureLinear(cx); + if (!locale) + return nullptr; + + MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag"); + + // Lithuanian, Turkish, and Azeri have language dependent case mappings. + static const char languagesWithSpecialCasing[][3] = { "lt", "tr", "az" }; + + // All strings in |languagesWithSpecialCasing| are of length two, so we + // only need to compare the first two characters to find a matching locale. + // ES2017 Intl, §9.2.2 BestAvailableLocale + if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') { + for (const auto& language : languagesWithSpecialCasing) { + if (locale->latin1OrTwoByteChar(0) == language[0] && + locale->latin1OrTwoByteChar(1) == language[1]) + { + return language; + } + } + } + + return ""; // ICU root locale } bool -js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) +js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + MOZ_ASSERT(args[0].isString()); + MOZ_ASSERT(args[1].isString()); - /* - * Forcefully ignore the first (or any) argument and return toLowerCase(), - * ECMA has reserved that argument, presumably for defining the locale. - */ - if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) { - RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); - if (!str) - return false; + RootedLinearString linear(cx, args[0].toString()->ensureLinear(cx)); + if (!linear) + return false; - RootedValue result(cx); - if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) + const char* locale = CaseMappingLocale(cx, args[1].toString()); + if (!locale) + return false; + + // Call String.prototype.toLowerCase() for language independent casing. + if (intl::StringsAreEqual(locale, "")) { + JSString* str = StringToLowerCase(cx, linear); + if (!str) return false; - args.rval().set(result); + args.rval().setString(str); return true; } - return ToLowerCaseHelper(cx, args); + AutoStableStringChars inputChars(cx); + if (!inputChars.initTwoByte(cx, linear)) + return false; + mozilla::Range<const char16_t> input = inputChars.twoByteRange(); + + // Maximum case mapping length is three characters. + static_assert(JSString::MAX_LENGTH < INT32_MAX / 3, + "Case conversion doesn't overflow int32_t indices"); + + JSString* str = intl::CallICU(cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) { + return u_strToLower(chars, size, Char16ToUChar(input.begin().get()), input.length(), + locale, status); + }); + if (!str) + return false; + + args.rval().setString(str); + return true; } -template <typename DestChar, typename SrcChar> -static void -ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length) +static inline bool +CanUpperCaseSpecialCasing(Latin1Char charCode) { - MOZ_ASSERT(firstLowerCase < length); + // Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1 + // characters don't have special casing rules. + MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S, + !unicode::CanUpperCaseSpecialCasing(charCode)); - for (size_t i = 0; i < firstLowerCase; i++) - destChars[i] = srcChars[i]; + return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S; +} + +static inline bool +CanUpperCaseSpecialCasing(char16_t charCode) +{ + return unicode::CanUpperCaseSpecialCasing(charCode); +} + +static inline size_t +LengthUpperCaseSpecialCasing(Latin1Char charCode) +{ + // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'. + MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S); + + return 2; +} + +static inline size_t +LengthUpperCaseSpecialCasing(char16_t charCode) +{ + MOZ_ASSERT(CanUpperCaseSpecialCasing(charCode)); + + return unicode::LengthUpperCaseSpecialCasing(charCode); +} + +static inline void +AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index) +{ + // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'. + MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S); + static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character"); + + elements[(*index)++] = 'S'; + elements[(*index)++] = 'S'; +} - for (size_t i = firstLowerCase; i < length; i++) { +static inline void +AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index) +{ + unicode::AppendUpperCaseSpecialCasing(charCode, elements, index); +} + +// See ToLowerCaseImpl for an explanation of the parameters. +template <typename DestChar, typename SrcChar> +static size_t +ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex, size_t srcLength, + size_t destLength) +{ + static_assert(IsSame<SrcChar, Latin1Char>::value || !IsSame<DestChar, Latin1Char>::value, + "cannot write non-Latin-1 characters into Latin-1 string"); + MOZ_ASSERT(startIndex < srcLength); + MOZ_ASSERT(srcLength <= destLength); + + size_t j = startIndex; + for (size_t i = startIndex; i < srcLength; i++) { char16_t c = srcChars[i]; if (!IsSame<DestChar, Latin1Char>::value) { - if (unicode::IsLeadSurrogate(c) && i + 1 < length) { + if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) { char16_t trail = srcChars[i + 1]; if (unicode::IsTrailSurrogate(trail)) { trail = unicode::ToUpperCaseNonBMPTrail(c, trail); - destChars[i] = c; - destChars[i + 1] = trail; + destChars[j++] = c; + destChars[j++] = trail; i++; continue; } } } + + if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) { + // Return if the output buffer is too small. + if (srcLength == destLength) + return i; + + AppendUpperCaseSpecialCasing(c, destChars, &j); + continue; + } + c = unicode::ToUpperCase(c); MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR); - destChars[i] = c; + destChars[j++] = c; + } + + MOZ_ASSERT(j == destLength); + destChars[destLength] = '\0'; + + return srcLength; +} + +// Explicit instantiation so we don't hit the static_assert from above. +static bool +ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars, size_t startIndex, + size_t srcLength, size_t destLength) +{ + MOZ_ASSERT_UNREACHABLE("cannot write non-Latin-1 characters into Latin-1 string"); + return false; +} + +template <typename CharT> +static size_t +ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length) +{ + size_t upperLength = length; + for (size_t i = startIndex; i < length; i++) { + char16_t c = chars[i]; + + if (c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c))) + upperLength += LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1; + } + return upperLength; +} + +template <typename DestChar, typename SrcChar> +static inline void +CopyChars(DestChar* destChars, const SrcChar* srcChars, size_t length) +{ + static_assert(!IsSame<DestChar, SrcChar>::value, "PodCopy is used for the same type case"); + for (size_t i = 0; i < length; i++) + destChars[i] = srcChars[i]; +} + +template <typename CharT> +static inline void +CopyChars(CharT* destChars, const CharT* srcChars, size_t length) +{ + PodCopy(destChars, srcChars, length); +} + +template <typename DestChar, typename SrcChar> +static inline UniquePtr<DestChar[], JS::FreePolicy> +ToUpperCase(JSContext* cx, const SrcChar* chars, size_t startIndex, size_t length, + size_t* resultLength) +{ + MOZ_ASSERT(startIndex < length); + + using DestCharPtr = UniquePtr<DestChar[], JS::FreePolicy>; + + *resultLength = length; + DestCharPtr buf = cx->make_pod_array<DestChar>(length + 1); + if (!buf) + return buf; + + CopyChars(buf.get(), chars, startIndex); + + size_t readChars = ToUpperCaseImpl(buf.get(), chars, startIndex, length, length); + if (readChars < length) { + size_t actualLength = ToUpperCaseLength(chars, readChars, length); + + *resultLength = actualLength; + DestCharPtr buf2 = ReallocChars(cx, Move(buf), length + 1, actualLength + 1); + if (!buf2) + return buf2; + + buf = Move(buf2); + + MOZ_ALWAYS_TRUE(length == + ToUpperCaseImpl(buf.get(), chars, readChars, length, actualLength)); } - destChars[length] = '\0'; + return buf; } template <typename CharT> static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) { - typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr; - typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr; + using Latin1CharPtr = UniquePtr<Latin1Char[], JS::FreePolicy>; + using TwoByteCharPtr = UniquePtr<char16_t[], JS::FreePolicy>; mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars; - size_t length = str->length(); + const size_t length = str->length(); + size_t resultLength; { AutoCheckCannotGC nogc; const CharT* chars = str->chars<CharT>(nogc); - // Look for the first lower case character. + // Look for the first character that changes when uppercased. size_t i = 0; for (; i < length; i++) { char16_t c = chars[i]; @@ -786,21 +1165,33 @@ ToUpperCase(JSContext* cx, JSLinearString* str) } if (unicode::CanUpperCase(c)) break; + if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c)))) + break; } - // If all characters are upper case, return the input string. + // If no character needs to change, return the input string. if (i == length) return str; - // If the string is Latin1, check if it contains the MICRO SIGN (0xb5) - // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding - // upper case characters are not in the Latin1 range. + // The string changes when uppercased, so we must create a new string. + // Can it be Latin-1? + // + // If the original string is Latin-1, it can -- unless the string + // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS, + // the only Latin-1 codepoints that don't uppercase within Latin-1. + // Search for those codepoints to decide whether the new string can be + // Latin-1. + // If the original string is a two-byte string, its uppercase form is + // so rarely Latin-1 that we don't even consider creating a new + // Latin-1 string. bool resultIsLatin1; if (IsSame<CharT, Latin1Char>::value) { resultIsLatin1 = true; for (size_t j = i; j < length; j++) { Latin1Char c = chars[j]; - if (c == 0xb5 || c == 0xff) { + if (c == unicode::MICRO_SIGN || + c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) + { MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR); resultIsLatin1 = false; break; @@ -813,31 +1204,29 @@ ToUpperCase(JSContext* cx, JSLinearString* str) } if (resultIsLatin1) { - Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1); + Latin1CharPtr buf = ToUpperCase<Latin1Char>(cx, chars, i, length, &resultLength); if (!buf) return nullptr; - ToUpperCaseImpl(buf.get(), chars, i, length); newChars.construct<Latin1CharPtr>(Move(buf)); } else { - TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1); + TwoByteCharPtr buf = ToUpperCase<char16_t>(cx, chars, i, length, &resultLength); if (!buf) return nullptr; - ToUpperCaseImpl(buf.get(), chars, i, length); newChars.construct<TwoByteCharPtr>(Move(buf)); } } JSString* res; if (newChars.constructed<Latin1CharPtr>()) { - res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length); + res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), resultLength); if (!res) return nullptr; mozilla::Unused << newChars.ref<Latin1CharPtr>().release(); } else { - res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length); + res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), resultLength); if (!res) return nullptr; @@ -847,57 +1236,79 @@ ToUpperCase(JSContext* cx, JSLinearString* str) return res; } -static bool -ToUpperCaseHelper(JSContext* cx, const CallArgs& args) +JSString* +js::StringToUpperCase(JSContext* cx, HandleLinearString string) +{ + if (string->hasLatin1Chars()) + return ToUpperCase<Latin1Char>(cx, string); + return ToUpperCase<char16_t>(cx, string); +} + +bool +js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) { + CallArgs args = CallArgsFromVp(argc, vp); + RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); if (!str) return false; - JSLinearString* linear = str->ensureLinear(cx); + RootedLinearString linear(cx, str->ensureLinear(cx)); if (!linear) return false; - if (linear->hasLatin1Chars()) - str = ToUpperCase<Latin1Char>(cx, linear); - else - str = ToUpperCase<char16_t>(cx, linear); - if (!str) + JSString* result = StringToUpperCase(cx, linear); + if (!result) return false; - args.rval().setString(str); + args.rval().setString(result); return true; } bool -js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) -{ - return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp)); -} - -bool -js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) +js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); + MOZ_ASSERT(args.length() == 2); + MOZ_ASSERT(args[0].isString()); + MOZ_ASSERT(args[1].isString()); - /* - * Forcefully ignore the first (or any) argument and return toUpperCase(), - * ECMA has reserved that argument, presumably for defining the locale. - */ - if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) { - RootedString str(cx, ToStringForStringFunction(cx, args.thisv())); - if (!str) - return false; + RootedLinearString linear(cx, args[0].toString()->ensureLinear(cx)); + if (!linear) + return false; + + const char* locale = CaseMappingLocale(cx, args[1].toString()); + if (!locale) + return false; - RootedValue result(cx); - if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) + // Call String.prototype.toUpperCase() for language independent casing. + if (intl::StringsAreEqual(locale, "")) { + JSString* str = StringToUpperCase(cx, linear); + if (!str) return false; - args.rval().set(result); + args.rval().setString(str); return true; } - return ToUpperCaseHelper(cx, args); + AutoStableStringChars inputChars(cx); + if (!inputChars.initTwoByte(cx, linear)) + return false; + mozilla::Range<const char16_t> input = inputChars.twoByteRange(); + + // Maximum case mapping length is three characters. + static_assert(JSString::MAX_LENGTH < INT32_MAX / 3, + "Case conversion doesn't overflow int32_t indices"); + + JSString* str = intl::CallICU(cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) { + return u_strToUpper(chars, size, Char16ToUChar(input.begin().get()), input.length(), + locale, status); + }); + if (!str) + return false; + + args.rval().setString(str); + return true; } /* ES2017 21.1.3.12. */ @@ -944,7 +1355,7 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp) if (!linear) return false; - // Latin1 strings are already in Normalization Form C. + // Latin-1 strings are already in Normalization Form C. if (form == NFC && linear->hasLatin1Chars()) { // Step 7. args.rval().setString(str); @@ -1359,7 +1770,7 @@ StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t /* * For big patterns with large potential overlap we want the SIMD-optimized * speed of memcmp. For small patterns, a simple loop is faster. We also can't - * use memcmp if one of the strings is TwoByte and the other is Latin1. + * use memcmp if one of the strings is TwoByte and the other is Latin-1. * * FIXME: Linux memcmp performance is sad and the manual loop is faster. */ @@ -1555,7 +1966,7 @@ RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match) * need to build the list of leaf nodes. Do both here: iterate over the * nodes so long as there are not too many. * - * We also don't use rope matching if the rope contains both Latin1 and + * We also don't use rope matching if the rope contains both Latin-1 and * TwoByte nodes, to simplify the match algorithm. */ { @@ -2890,8 +3301,8 @@ static const JSFunctionSpec string_methods[] = { JS_FN("trimStart", str_trimStart, 0,0), JS_FN("trimRight", str_trimEnd, 0,0), JS_FN("trimEnd", str_trimEnd, 0,0), - JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,0), - JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,0), + JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0,0), + JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0,0), JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0), JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0), JS_FN("normalize", str_normalize, 0,0), @@ -3000,7 +3411,7 @@ js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) // string (thin or fat) and so we don't need to malloc the chars. (We could // cover some cases where args.length() goes up to // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are - // all Latin1, but it doesn't seem worth the effort.) + // all Latin-1, but it doesn't seem worth the effort.) if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE) return str_fromCharCode_few_args(cx, args); @@ -3143,7 +3554,7 @@ js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) // string (thin or fat) and so we don't need to malloc the chars. (We could // cover some cases where |args.length()| goes up to // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars - // are all Latin1, but it doesn't seem worth the effort.) + // are all Latin-1, but it doesn't seem worth the effort.) if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) return str_fromCodePoint_few_args(cx, args); diff --git a/js/src/jsstr.h b/js/src/jsstr.h index 0e31276a86..cd2be4e59b 100644 --- a/js/src/jsstr.h +++ b/js/src/jsstr.h @@ -371,11 +371,24 @@ str_trimStart(JSContext* cx, unsigned argc, Value* vp); extern bool str_trimEnd(JSContext* cx, unsigned argc, Value* vp); -extern bool -str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp); +/** + * Returns the input string converted to lower case based on the language + * specific case mappings for the input locale. + * + * Usage: lowerCase = intl_toLocaleLowerCase(string, locale) + */ +extern MOZ_MUST_USE bool +intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns the input string converted to upper case based on the language + * specific case mappings for the input locale. + * + * Usage: upperCase = intl_toLocaleUpperCase(string, locale) + */ +extern MOZ_MUST_USE bool +intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp); -extern bool -str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp); extern bool str_normalize(JSContext* cx, unsigned argc, Value* vp); @@ -480,6 +493,12 @@ JSString* str_replaceAll_string_raw(JSContext* cx, HandleString string, HandleString pattern, HandleString replacement); +extern JSString* +StringToLowerCase(JSContext* cx, HandleLinearString string); + +extern JSString* +StringToUpperCase(JSContext* cx, HandleLinearString string); + extern bool StringConstructor(JSContext* cx, unsigned argc, Value* vp); diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp index bc66d6aa1e..0717bfd490 100644 --- a/js/src/vm/SelfHosting.cpp +++ b/js/src/vm/SelfHosting.cpp @@ -2207,11 +2207,9 @@ static const JSFunctionSpec intrinsic_functions[] = { JS_FN("std_String_trimStart", str_trimStart, 0,0), JS_FN("std_String_trimRight", str_trimEnd, 0,0), JS_FN("std_String_trimEnd", str_trimEnd, 0,0), - JS_FN("std_String_toLocaleLowerCase", str_toLocaleLowerCase, 0,0), - JS_FN("std_String_toLocaleUpperCase", str_toLocaleUpperCase, 0,0), JS_FN("std_String_normalize", str_normalize, 0,0), JS_FN("std_String_concat", str_concat, 1,0), - + JS_FN("std_TypedArray_buffer", js::TypedArray_bufferGetter, 1,0), JS_FN("std_WeakMap_has", WeakMap_has, 1,0), @@ -2485,6 +2483,8 @@ static const JSFunctionSpec intrinsic_functions[] = { JS_FN("intl_PluralRules_availableLocales", intl_PluralRules_availableLocales, 0,0), JS_FN("intl_GetPluralCategories", intl_GetPluralCategories, 2, 0), JS_FN("intl_SelectPluralRule", intl_SelectPluralRule, 2,0), + JS_FN("intl_toLocaleLowerCase", intl_toLocaleLowerCase, 2,0), + JS_FN("intl_toLocaleUpperCase", intl_toLocaleUpperCase, 2,0), JS_FN("intl_RelativeTimeFormat_availableLocales", intl_RelativeTimeFormat_availableLocales, 0,0), JS_FN("intl_FormatRelativeTime", intl_FormatRelativeTime, 3,0), diff --git a/js/src/vm/SpecialCasing.txt b/js/src/vm/SpecialCasing.txt new file mode 100644 index 0000000000..c90d09acb3 --- /dev/null +++ b/js/src/vm/SpecialCasing.txt @@ -0,0 +1,281 @@ +# SpecialCasing-11.0.0.txt +# Date: 2018-02-22, 06:16:47 GMT +# © 2018 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Special Casing +# +# This file is a supplement to the UnicodeData.txt file. It does not define any +# properties, but rather provides additional information about the casing of +# Unicode characters, for situations when casing incurs a change in string length +# or is dependent on context or locale. For compatibility, the UnicodeData.txt +# file only contains simple case mappings for characters where they are one-to-one +# and independent of context and language. The data in this file, combined with +# the simple case mappings in UnicodeData.txt, defines the full case mappings +# Lowercase_Mapping (lc), Titlecase_Mapping (tc), and Uppercase_Mapping (uc). +# +# Note that the preferred mechanism for defining tailored casing operations is +# the Unicode Common Locale Data Repository (CLDR). For more information, see the +# discussion of case mappings and case algorithms in the Unicode Standard. +# +# All code points not listed in this file that do not have a simple case mappings +# in UnicodeData.txt map to themselves. +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment> +# +# <code>, <lower>, <title>, and <upper> provide the respective full case mappings +# of <code>, expressed as character values in hex. If there is more than one character, +# they are separated by spaces. Other than as used to separate elements, spaces are +# to be ignored. +# +# The <condition_list> is optional. Where present, it consists of one or more language IDs +# or casing contexts, separated by spaces. In these conditions: +# - A condition list overrides the normal behavior if all of the listed conditions are true. +# - The casing context is always the context of the characters in the original string, +# NOT in the resulting string. +# - Case distinctions in the condition list are not significant. +# - Conditions preceded by "Not_" represent the negation of the condition. +# The condition list is not represented in the UCD as a formal property. +# +# A language ID is defined by BCP 47, with '-' and '_' treated equivalently. +# +# A casing context for a character is defined by Section 3.13 Default Case Algorithms +# of The Unicode Standard. +# +# Parsers of this file must be prepared to deal with future additions to this format: +# * Additional contexts +# * Additional fields +# ================================================================================ + +# ================================================================================ +# Unconditional mappings +# ================================================================================ + +# The German es-zed is special--the normal mapping is to SS. +# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>)) + +00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S + +# Preserve canonical equivalence for I with dot. Turkic is handled below. + +0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE + +# Ligatures + +FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF +FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI +FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL +FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI +FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL +FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T +FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST + +0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN +FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH + +# No corresponding uppercase precomposed character + +0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON +1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI + +# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased, +# the result will be incorrect unless the iota-subscript is moved to the end +# of any sequence of combining marks. Otherwise, the accents will go on the capital iota. +# This process can be achieved by first transforming the text to NFC before casing. +# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA> + +# The following cases are already in the UnicodeData.txt file, so are only commented here. + +# 0345; 0345; 0399; 0399; # COMBINING GREEK YPOGEGRAMMENI + +# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript) +# have special uppercases. +# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase! + +1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + +# Some characters with YPOGEGRAMMENI also have no corresponding titlecases + +1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + +1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + +# ================================================================================ +# Conditional Mappings +# The remainder of this file provides conditional casing data used to produce +# full case mappings. +# ================================================================================ +# Language-Insensitive Mappings +# These are characters whose full case mappings do not depend on language, but do +# depend on context (which characters come before or after). For more information +# see the header of this file and the Unicode Standard. +# ================================================================================ + +# Special case for final form of sigma + +03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA + +# Note: the following cases for non-final are already in the UnicodeData.txt file. + +# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA +# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA +# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA + +# Note: the following cases are not included, since they would case-fold in lowercasing + +# 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA +# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA + +# ================================================================================ +# Language-Sensitive Mappings +# These are characters whose full case mappings depend on language and perhaps also +# context (which characters come before or after). For more information +# see the header of this file and the Unicode Standard. +# ================================================================================ + +# Lithuanian + +# Lithuanian retains the dot in a lowercase i when followed by accents. + +# Remove DOT ABOVE after "i" with upper or titlecase + +0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE + +# Introduce an explicit dot above when lowercasing capital I's and J's +# whenever there are more accents above. +# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) + +0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I +004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J +012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK +00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE +0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE + +# ================================================================================ + +# Turkish and Azeri + +# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri +# The following rules handle those cases. + +0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE + +# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. +# This matches the behavior of the canonically equivalent I-dot_above + +0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE +0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE + +# When lowercasing, unless an I is before a dot_above, it turns into a dotless i. + +0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I +0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I + +# When uppercasing, i turns into a dotted capital I + +0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I +0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I + +# Note: the following case is already in the UnicodeData.txt file. + +# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I + +# EOF + diff --git a/js/src/vm/Unicode.cpp b/js/src/vm/Unicode.cpp index bc4566c825..0236f7c649 100644 --- a/js/src/vm/Unicode.cpp +++ b/js/src/vm/Unicode.cpp @@ -55,176 +55,176 @@ using namespace js::unicode; * stop if you found the best shift */ const CharacterInfo unicode::js_charinfo[] = { - {0, 0, 0}, - {0, 0, 1}, - {0, 0, 4}, - {0, 32, 2}, - {65504, 0, 2}, - {0, 0, 2}, - {743, 0, 2}, - {121, 0, 2}, - {0, 1, 2}, - {65535, 0, 2}, - {0, 65337, 2}, - {65304, 0, 2}, - {0, 65415, 2}, - {65236, 0, 2}, - {195, 0, 2}, - {0, 210, 2}, - {0, 206, 2}, - {0, 205, 2}, - {0, 79, 2}, - {0, 202, 2}, - {0, 203, 2}, - {0, 207, 2}, - {97, 0, 2}, - {0, 211, 2}, - {0, 209, 2}, - {163, 0, 2}, - {0, 213, 2}, - {130, 0, 2}, - {0, 214, 2}, - {0, 218, 2}, - {0, 217, 2}, - {0, 219, 2}, - {56, 0, 2}, - {0, 2, 2}, - {65535, 1, 2}, - {65534, 0, 2}, - {65457, 0, 2}, - {0, 65439, 2}, - {0, 65480, 2}, - {0, 65406, 2}, - {0, 10795, 2}, - {0, 65373, 2}, - {0, 10792, 2}, - {10815, 0, 2}, - {0, 65341, 2}, - {0, 69, 2}, - {0, 71, 2}, - {10783, 0, 2}, - {10780, 0, 2}, - {10782, 0, 2}, - {65326, 0, 2}, - {65330, 0, 2}, - {65331, 0, 2}, - {65334, 0, 2}, - {65333, 0, 2}, - {42319, 0, 2}, - {42315, 0, 2}, - {65329, 0, 2}, - {42280, 0, 2}, - {42308, 0, 2}, - {65327, 0, 2}, - {65325, 0, 2}, - {10743, 0, 2}, - {42305, 0, 2}, - {10749, 0, 2}, - {65323, 0, 2}, - {65322, 0, 2}, - {10727, 0, 2}, - {65318, 0, 2}, - {42282, 0, 2}, - {65467, 0, 2}, - {65319, 0, 2}, - {65465, 0, 2}, - {65317, 0, 2}, - {42261, 0, 2}, - {42258, 0, 2}, - {84, 0, 4}, - {0, 116, 2}, - {0, 38, 2}, - {0, 37, 2}, - {0, 64, 2}, - {0, 63, 2}, - {65498, 0, 2}, - {65499, 0, 2}, - {65505, 0, 2}, - {65472, 0, 2}, - {65473, 0, 2}, - {0, 8, 2}, - {65474, 0, 2}, - {65479, 0, 2}, - {65489, 0, 2}, - {65482, 0, 2}, - {65528, 0, 2}, - {65450, 0, 2}, - {65456, 0, 2}, - {7, 0, 2}, - {65420, 0, 2}, - {0, 65476, 2}, - {65440, 0, 2}, - {0, 65529, 2}, - {0, 80, 2}, - {0, 15, 2}, - {65521, 0, 2}, - {0, 48, 2}, - {65488, 0, 2}, - {0, 7264, 2}, - {3008, 0, 2}, - {0, 38864, 2}, - {59282, 0, 2}, - {59283, 0, 2}, - {59292, 0, 2}, - {59294, 0, 2}, - {59293, 0, 2}, - {59300, 0, 2}, - {59355, 0, 2}, - {35266, 0, 2}, - {0, 62528, 2}, - {35332, 0, 2}, - {3814, 0, 2}, - {65477, 0, 2}, - {0, 57921, 2}, - {8, 0, 2}, - {0, 65528, 2}, - {74, 0, 2}, - {86, 0, 2}, - {100, 0, 2}, - {128, 0, 2}, - {112, 0, 2}, - {126, 0, 2}, - {9, 0, 2}, - {0, 65462, 2}, - {0, 65527, 2}, - {58331, 0, 2}, - {0, 65450, 2}, - {0, 65436, 2}, - {0, 65424, 2}, - {0, 65408, 2}, - {0, 65410, 2}, - {0, 58019, 2}, - {0, 57153, 2}, - {0, 57274, 2}, - {0, 28, 2}, - {65508, 0, 2}, - {0, 16, 2}, - {65520, 0, 2}, - {0, 26, 0}, - {65510, 0, 0}, - {0, 54793, 2}, - {0, 61722, 2}, - {0, 54809, 2}, - {54741, 0, 2}, - {54744, 0, 2}, - {0, 54756, 2}, - {0, 54787, 2}, - {0, 54753, 2}, - {0, 54754, 2}, - {0, 54721, 2}, - {58272, 0, 2}, - {0, 30204, 2}, - {0, 23256, 2}, - {0, 23228, 2}, - {0, 23217, 2}, - {0, 23221, 2}, - {0, 23231, 2}, - {0, 23278, 2}, - {0, 23254, 2}, - {0, 23275, 2}, - {0, 928, 2}, - {64608, 0, 2}, - {26672, 0, 2}, + { 0, 0, 0 }, + { 0, 0, 1 }, + { 0, 0, 4 }, + { 0, 32, 2 }, + { 65504, 0, 2 }, + { 0, 0, 2 }, + { 743, 0, 2 }, + { 121, 0, 2 }, + { 0, 1, 2 }, + { 65535, 0, 2 }, + { 0, 65337, 2 }, + { 65304, 0, 2 }, + { 0, 65415, 2 }, + { 65236, 0, 2 }, + { 195, 0, 2 }, + { 0, 210, 2 }, + { 0, 206, 2 }, + { 0, 205, 2 }, + { 0, 79, 2 }, + { 0, 202, 2 }, + { 0, 203, 2 }, + { 0, 207, 2 }, + { 97, 0, 2 }, + { 0, 211, 2 }, + { 0, 209, 2 }, + { 163, 0, 2 }, + { 0, 213, 2 }, + { 130, 0, 2 }, + { 0, 214, 2 }, + { 0, 218, 2 }, + { 0, 217, 2 }, + { 0, 219, 2 }, + { 56, 0, 2 }, + { 0, 2, 2 }, + { 65535, 1, 2 }, + { 65534, 0, 2 }, + { 65457, 0, 2 }, + { 0, 65439, 2 }, + { 0, 65480, 2 }, + { 0, 65406, 2 }, + { 0, 10795, 2 }, + { 0, 65373, 2 }, + { 0, 10792, 2 }, + { 10815, 0, 2 }, + { 0, 65341, 2 }, + { 0, 69, 2 }, + { 0, 71, 2 }, + { 10783, 0, 2 }, + { 10780, 0, 2 }, + { 10782, 0, 2 }, + { 65326, 0, 2 }, + { 65330, 0, 2 }, + { 65331, 0, 2 }, + { 65334, 0, 2 }, + { 65333, 0, 2 }, + { 42319, 0, 2 }, + { 42315, 0, 2 }, + { 65329, 0, 2 }, + { 42280, 0, 2 }, + { 42308, 0, 2 }, + { 65327, 0, 2 }, + { 65325, 0, 2 }, + { 10743, 0, 2 }, + { 42305, 0, 2 }, + { 10749, 0, 2 }, + { 65323, 0, 2 }, + { 65322, 0, 2 }, + { 10727, 0, 2 }, + { 65318, 0, 2 }, + { 42282, 0, 2 }, + { 65467, 0, 2 }, + { 65319, 0, 2 }, + { 65465, 0, 2 }, + { 65317, 0, 2 }, + { 42261, 0, 2 }, + { 42258, 0, 2 }, + { 84, 0, 4 }, + { 0, 116, 2 }, + { 0, 38, 2 }, + { 0, 37, 2 }, + { 0, 64, 2 }, + { 0, 63, 2 }, + { 65498, 0, 2 }, + { 65499, 0, 2 }, + { 65505, 0, 2 }, + { 65472, 0, 2 }, + { 65473, 0, 2 }, + { 0, 8, 2 }, + { 65474, 0, 2 }, + { 65479, 0, 2 }, + { 65489, 0, 2 }, + { 65482, 0, 2 }, + { 65528, 0, 2 }, + { 65450, 0, 2 }, + { 65456, 0, 2 }, + { 7, 0, 2 }, + { 65420, 0, 2 }, + { 0, 65476, 2 }, + { 65440, 0, 2 }, + { 0, 65529, 2 }, + { 0, 80, 2 }, + { 0, 15, 2 }, + { 65521, 0, 2 }, + { 0, 48, 2 }, + { 65488, 0, 2 }, + { 0, 7264, 2 }, + { 3008, 0, 2 }, + { 0, 38864, 2 }, + { 59282, 0, 2 }, + { 59283, 0, 2 }, + { 59292, 0, 2 }, + { 59294, 0, 2 }, + { 59293, 0, 2 }, + { 59300, 0, 2 }, + { 59355, 0, 2 }, + { 35266, 0, 2 }, + { 0, 62528, 2 }, + { 35332, 0, 2 }, + { 3814, 0, 2 }, + { 65477, 0, 2 }, + { 0, 57921, 2 }, + { 8, 0, 2 }, + { 0, 65528, 2 }, + { 74, 0, 2 }, + { 86, 0, 2 }, + { 100, 0, 2 }, + { 128, 0, 2 }, + { 112, 0, 2 }, + { 126, 0, 2 }, + { 9, 0, 2 }, + { 0, 65462, 2 }, + { 0, 65527, 2 }, + { 58331, 0, 2 }, + { 0, 65450, 2 }, + { 0, 65436, 2 }, + { 0, 65424, 2 }, + { 0, 65408, 2 }, + { 0, 65410, 2 }, + { 0, 58019, 2 }, + { 0, 57153, 2 }, + { 0, 57274, 2 }, + { 0, 28, 2 }, + { 65508, 0, 2 }, + { 0, 16, 2 }, + { 65520, 0, 2 }, + { 0, 26, 0 }, + { 65510, 0, 0 }, + { 0, 54793, 2 }, + { 0, 61722, 2 }, + { 0, 54809, 2 }, + { 54741, 0, 2 }, + { 54744, 0, 2 }, + { 0, 54756, 2 }, + { 0, 54787, 2 }, + { 0, 54753, 2 }, + { 0, 54754, 2 }, + { 0, 54721, 2 }, + { 58272, 0, 2 }, + { 0, 30204, 2 }, + { 0, 23256, 2 }, + { 0, 23228, 2 }, + { 0, 23217, 2 }, + { 0, 23221, 2 }, + { 0, 23231, 2 }, + { 0, 23278, 2 }, + { 0, 23254, 2 }, + { 0, 23275, 2 }, + { 0, 928, 2 }, + { 64608, 0, 2 }, + { 26672, 0, 2 }, }; const uint8_t unicode::index1[] = { @@ -928,141 +928,141 @@ const uint8_t unicode::index2[] = { }; const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = { - {0, 0, 0}, - {32, 0, 0}, - {32, 232, 0}, - {32, 300, 0}, - {0, 200, 0}, - {0, 268, 0}, - {0, 775, 0}, - {1, 0, 0}, - {65336, 0, 0}, - {65415, 0, 0}, - {65268, 0, 0}, - {210, 0, 0}, - {206, 0, 0}, - {205, 0, 0}, - {79, 0, 0}, - {202, 0, 0}, - {203, 0, 0}, - {207, 0, 0}, - {211, 0, 0}, - {209, 0, 0}, - {213, 0, 0}, - {214, 0, 0}, - {218, 0, 0}, - {217, 0, 0}, - {219, 0, 0}, - {1, 2, 0}, - {0, 1, 0}, - {65535, 0, 0}, - {65439, 0, 0}, - {65480, 0, 0}, - {65406, 0, 0}, - {10795, 0, 0}, - {65373, 0, 0}, - {10792, 0, 0}, - {65341, 0, 0}, - {69, 0, 0}, - {71, 0, 0}, - {0, 116, 7289}, - {116, 0, 0}, - {38, 0, 0}, - {37, 0, 0}, - {64, 0, 0}, - {63, 0, 0}, - {32, 62, 0}, - {32, 96, 0}, - {32, 57, 0}, - {65452, 32, 7205}, - {32, 86, 0}, - {64793, 32, 0}, - {32, 54, 0}, - {32, 80, 0}, - {31, 32, 0}, - {32, 47, 0}, - {0, 30, 0}, - {0, 64, 0}, - {0, 25, 0}, - {65420, 0, 7173}, - {0, 54, 0}, - {64761, 0, 0}, - {0, 22, 0}, - {0, 48, 0}, - {0, 15, 0}, - {8, 0, 0}, - {65506, 0, 0}, - {65511, 0, 0}, - {65521, 0, 0}, - {65514, 0, 0}, - {65482, 0, 0}, - {65488, 0, 0}, - {65472, 0, 0}, - {65529, 0, 0}, - {80, 0, 0}, - {32, 6254, 0}, - {32, 6253, 0}, - {32, 6244, 0}, - {32, 6242, 0}, - {32, 6242, 6243}, - {32, 6236, 0}, - {0, 6222, 0}, - {0, 6221, 0}, - {0, 6212, 0}, - {0, 6210, 0}, - {0, 6210, 6211}, - {0, 6204, 0}, - {1, 6181, 0}, - {0, 6180, 0}, - {15, 0, 0}, - {48, 0, 0}, - {7264, 0, 0}, - {38864, 0, 0}, - {59314, 0, 0}, - {59315, 0, 0}, - {59324, 0, 0}, - {59326, 0, 0}, - {59326, 0, 1}, - {59325, 65535, 0}, - {59332, 0, 0}, - {59356, 0, 0}, - {0, 35267, 0}, - {62528, 0, 0}, - {1, 59, 0}, - {0, 58, 0}, - {65478, 0, 0}, - {65528, 0, 0}, - {65462, 0, 0}, - {65527, 0, 0}, - {58247, 58363, 0}, - {65450, 0, 0}, - {65436, 0, 0}, - {65424, 0, 0}, - {65408, 0, 0}, - {65410, 0, 0}, - {28, 0, 0}, - {16, 0, 0}, - {26, 0, 0}, - {54793, 0, 0}, - {61722, 0, 0}, - {54809, 0, 0}, - {54756, 0, 0}, - {54787, 0, 0}, - {54753, 0, 0}, - {54754, 0, 0}, - {54721, 0, 0}, - {30270, 1, 0}, - {30269, 0, 0}, - {30204, 0, 0}, - {23256, 0, 0}, - {23228, 0, 0}, - {23217, 0, 0}, - {23221, 0, 0}, - {23231, 0, 0}, - {23278, 0, 0}, - {23254, 0, 0}, - {23275, 0, 0}, - {928, 0, 0}, + { 0, 0, 0 }, + { 32, 0, 0 }, + { 32, 232, 0 }, + { 32, 300, 0 }, + { 0, 200, 0 }, + { 0, 268, 0 }, + { 0, 775, 0 }, + { 1, 0, 0 }, + { 65336, 0, 0 }, + { 65415, 0, 0 }, + { 65268, 0, 0 }, + { 210, 0, 0 }, + { 206, 0, 0 }, + { 205, 0, 0 }, + { 79, 0, 0 }, + { 202, 0, 0 }, + { 203, 0, 0 }, + { 207, 0, 0 }, + { 211, 0, 0 }, + { 209, 0, 0 }, + { 213, 0, 0 }, + { 214, 0, 0 }, + { 218, 0, 0 }, + { 217, 0, 0 }, + { 219, 0, 0 }, + { 1, 2, 0 }, + { 0, 1, 0 }, + { 65535, 0, 0 }, + { 65439, 0, 0 }, + { 65480, 0, 0 }, + { 65406, 0, 0 }, + { 10795, 0, 0 }, + { 65373, 0, 0 }, + { 10792, 0, 0 }, + { 65341, 0, 0 }, + { 69, 0, 0 }, + { 71, 0, 0 }, + { 0, 116, 7289 }, + { 116, 0, 0 }, + { 38, 0, 0 }, + { 37, 0, 0 }, + { 64, 0, 0 }, + { 63, 0, 0 }, + { 32, 62, 0 }, + { 32, 96, 0 }, + { 32, 57, 0 }, + { 65452, 32, 7205 }, + { 32, 86, 0 }, + { 64793, 32, 0 }, + { 32, 54, 0 }, + { 32, 80, 0 }, + { 31, 32, 0 }, + { 32, 47, 0 }, + { 0, 30, 0 }, + { 0, 64, 0 }, + { 0, 25, 0 }, + { 65420, 0, 7173 }, + { 0, 54, 0 }, + { 64761, 0, 0 }, + { 0, 22, 0 }, + { 0, 48, 0 }, + { 0, 15, 0 }, + { 8, 0, 0 }, + { 65506, 0, 0 }, + { 65511, 0, 0 }, + { 65521, 0, 0 }, + { 65514, 0, 0 }, + { 65482, 0, 0 }, + { 65488, 0, 0 }, + { 65472, 0, 0 }, + { 65529, 0, 0 }, + { 80, 0, 0 }, + { 32, 6254, 0 }, + { 32, 6253, 0 }, + { 32, 6244, 0 }, + { 32, 6242, 0 }, + { 32, 6242, 6243 }, + { 32, 6236, 0 }, + { 0, 6222, 0 }, + { 0, 6221, 0 }, + { 0, 6212, 0 }, + { 0, 6210, 0 }, + { 0, 6210, 6211 }, + { 0, 6204, 0 }, + { 1, 6181, 0 }, + { 0, 6180, 0 }, + { 15, 0, 0 }, + { 48, 0, 0 }, + { 7264, 0, 0 }, + { 38864, 0, 0 }, + { 59314, 0, 0 }, + { 59315, 0, 0 }, + { 59324, 0, 0 }, + { 59326, 0, 0 }, + { 59326, 0, 1 }, + { 59325, 65535, 0 }, + { 59332, 0, 0 }, + { 59356, 0, 0 }, + { 0, 35267, 0 }, + { 62528, 0, 0 }, + { 1, 59, 0 }, + { 0, 58, 0 }, + { 65478, 0, 0 }, + { 65528, 0, 0 }, + { 65462, 0, 0 }, + { 65527, 0, 0 }, + { 58247, 58363, 0 }, + { 65450, 0, 0 }, + { 65436, 0, 0 }, + { 65424, 0, 0 }, + { 65408, 0, 0 }, + { 65410, 0, 0 }, + { 28, 0, 0 }, + { 16, 0, 0 }, + { 26, 0, 0 }, + { 54793, 0, 0 }, + { 61722, 0, 0 }, + { 54809, 0, 0 }, + { 54756, 0, 0 }, + { 54787, 0, 0 }, + { 54753, 0, 0 }, + { 54754, 0, 0 }, + { 54721, 0, 0 }, + { 30270, 1, 0 }, + { 30269, 0, 0 }, + { 30204, 0, 0 }, + { 23256, 0, 0 }, + { 23228, 0, 0 }, + { 23217, 0, 0 }, + { 23221, 0, 0 }, + { 23231, 0, 0 }, + { 23278, 0, 0 }, + { 23254, 0, 0 }, + { 23275, 0, 0 }, + { 928, 0, 0 }, }; const uint8_t unicode::codepoints_with_same_upper_index1[] = { @@ -1289,220 +1289,220 @@ const uint8_t unicode::codepoints_with_same_upper_index2[] = { }; const FoldingInfo unicode::js_foldinfo[] = { - {0, 0, 0, 0}, - {32, 0, 0, 0}, - {32, 8415, 0, 0}, - {32, 300, 0, 0}, - {0, 65504, 0, 0}, - {0, 65504, 8383, 0}, - {0, 65504, 268, 0}, - {775, 743, 0, 0}, - {32, 8294, 0, 0}, - {0, 7615, 0, 0}, - {0, 65504, 8262, 0}, - {0, 121, 0, 0}, - {1, 0, 0, 0}, - {0, 65535, 0, 0}, - {65415, 0, 0, 0}, - {65268, 65236, 0, 0}, - {0, 195, 0, 0}, - {210, 0, 0, 0}, - {206, 0, 0, 0}, - {205, 0, 0, 0}, - {79, 0, 0, 0}, - {202, 0, 0, 0}, - {203, 0, 0, 0}, - {207, 0, 0, 0}, - {0, 97, 0, 0}, - {211, 0, 0, 0}, - {209, 0, 0, 0}, - {0, 163, 0, 0}, - {213, 0, 0, 0}, - {0, 130, 0, 0}, - {214, 0, 0, 0}, - {218, 0, 0, 0}, - {217, 0, 0, 0}, - {219, 0, 0, 0}, - {0, 56, 0, 0}, - {2, 1, 0, 0}, - {1, 65535, 0, 0}, - {0, 65534, 65535, 0}, - {0, 65457, 0, 0}, - {65439, 0, 0, 0}, - {65480, 0, 0, 0}, - {65406, 0, 0, 0}, - {10795, 0, 0, 0}, - {65373, 0, 0, 0}, - {10792, 0, 0, 0}, - {0, 10815, 0, 0}, - {65341, 0, 0, 0}, - {69, 0, 0, 0}, - {71, 0, 0, 0}, - {0, 10783, 0, 0}, - {0, 10780, 0, 0}, - {0, 10782, 0, 0}, - {0, 65326, 0, 0}, - {0, 65330, 0, 0}, - {0, 65331, 0, 0}, - {0, 65334, 0, 0}, - {0, 65333, 0, 0}, - {0, 42319, 0, 0}, - {0, 42315, 0, 0}, - {0, 65329, 0, 0}, - {0, 42280, 0, 0}, - {0, 42308, 0, 0}, - {0, 65327, 0, 0}, - {0, 65325, 0, 0}, - {0, 10743, 0, 0}, - {0, 42305, 0, 0}, - {0, 10749, 0, 0}, - {0, 65323, 0, 0}, - {0, 65322, 0, 0}, - {0, 10727, 0, 0}, - {0, 65318, 0, 0}, - {0, 42282, 0, 0}, - {0, 65467, 0, 0}, - {0, 65319, 0, 0}, - {0, 65465, 0, 0}, - {0, 65317, 0, 0}, - {0, 42261, 0, 0}, - {0, 42258, 0, 0}, - {116, 84, 7289, 0}, - {116, 0, 0, 0}, - {38, 0, 0, 0}, - {37, 0, 0, 0}, - {64, 0, 0, 0}, - {63, 0, 0, 0}, - {32, 62, 0, 0}, - {32, 96, 0, 0}, - {32, 57, 92, 0}, - {32, 65452, 7205, 0}, - {32, 86, 0, 0}, - {32, 64793, 0, 0}, - {32, 54, 0, 0}, - {32, 80, 0, 0}, - {32, 31, 0, 0}, - {32, 47, 0, 0}, - {32, 7549, 0, 0}, - {0, 65498, 0, 0}, - {0, 65499, 0, 0}, - {0, 65504, 30, 0}, - {0, 65504, 64, 0}, - {0, 65504, 25, 60}, - {0, 65420, 65504, 7173}, - {0, 65504, 54, 0}, - {0, 64761, 65504, 0}, - {0, 65504, 22, 0}, - {0, 65504, 48, 0}, - {1, 65505, 0, 0}, - {0, 65504, 65535, 0}, - {0, 65504, 15, 0}, - {0, 65504, 7517, 0}, - {0, 65472, 0, 0}, - {0, 65473, 0, 0}, - {8, 0, 0, 0}, - {65506, 65474, 0, 0}, - {65511, 65479, 35, 0}, - {65521, 65489, 0, 0}, - {65514, 65482, 0, 0}, - {0, 65528, 0, 0}, - {65482, 65450, 0, 0}, - {65488, 65456, 0, 0}, - {0, 7, 0, 0}, - {0, 65420, 0, 0}, - {65476, 65444, 65501, 0}, - {65472, 65440, 0, 0}, - {65529, 0, 0, 0}, - {80, 0, 0, 0}, - {32, 6254, 0, 0}, - {32, 6253, 0, 0}, - {32, 6244, 0, 0}, - {32, 6242, 0, 0}, - {32, 6242, 6243, 0}, - {32, 6236, 0, 0}, - {0, 65504, 6222, 0}, - {0, 65504, 6221, 0}, - {0, 65504, 6212, 0}, - {0, 65504, 6210, 0}, - {0, 65504, 6210, 6211}, - {0, 65504, 6204, 0}, - {0, 65456, 0, 0}, - {1, 6181, 0, 0}, - {0, 65535, 6180, 0}, - {15, 0, 0, 0}, - {0, 65521, 0, 0}, - {48, 0, 0, 0}, - {0, 65488, 0, 0}, - {7264, 0, 0, 0}, - {0, 3008, 0, 0}, - {0, 38864, 0, 0}, - {0, 8, 0, 0}, - {65528, 0, 0, 0}, - {59314, 59282, 0, 0}, - {59315, 59283, 0, 0}, - {59324, 59292, 0, 0}, - {59326, 59294, 0, 0}, - {59326, 59294, 1, 0}, - {59325, 59293, 65535, 0}, - {59332, 59300, 0, 0}, - {59356, 59355, 0, 0}, - {35267, 35266, 0, 0}, - {62528, 0, 0, 0}, - {0, 35332, 0, 0}, - {0, 3814, 0, 0}, - {1, 59, 0, 0}, - {0, 65535, 58, 0}, - {65478, 65477, 0, 0}, - {57921, 0, 0, 0}, - {0, 74, 0, 0}, - {0, 86, 0, 0}, - {0, 100, 0, 0}, - {0, 128, 0, 0}, - {0, 112, 0, 0}, - {0, 126, 0, 0}, - {0, 9, 0, 0}, - {65462, 0, 0, 0}, - {65527, 0, 0, 0}, - {58363, 58247, 58331, 0}, - {65450, 0, 0, 0}, - {65436, 0, 0, 0}, - {65424, 0, 0, 0}, - {65408, 0, 0, 0}, - {65410, 0, 0, 0}, - {58019, 57987, 0, 0}, - {57153, 57121, 0, 0}, - {57274, 57242, 0, 0}, - {28, 0, 0, 0}, - {0, 65508, 0, 0}, - {16, 0, 0, 0}, - {0, 65520, 0, 0}, - {26, 0, 0, 0}, - {0, 65510, 0, 0}, - {54793, 0, 0, 0}, - {61722, 0, 0, 0}, - {54809, 0, 0, 0}, - {0, 54741, 0, 0}, - {0, 54744, 0, 0}, - {54756, 0, 0, 0}, - {54787, 0, 0, 0}, - {54753, 0, 0, 0}, - {54754, 0, 0, 0}, - {54721, 0, 0, 0}, - {0, 58272, 0, 0}, - {1, 30270, 0, 0}, - {0, 30269, 65535, 0}, - {30204, 0, 0, 0}, - {23256, 0, 0, 0}, - {23228, 0, 0, 0}, - {23217, 0, 0, 0}, - {23221, 0, 0, 0}, - {23231, 0, 0, 0}, - {23278, 0, 0, 0}, - {23254, 0, 0, 0}, - {23275, 0, 0, 0}, - {928, 0, 0, 0}, - {0, 64608, 0, 0}, - {26672, 0, 0, 0}, + { 0, 0, 0, 0 }, + { 32, 0, 0, 0 }, + { 32, 8415, 0, 0 }, + { 32, 300, 0, 0 }, + { 0, 65504, 0, 0 }, + { 0, 65504, 8383, 0 }, + { 0, 65504, 268, 0 }, + { 775, 743, 0, 0 }, + { 32, 8294, 0, 0 }, + { 0, 7615, 0, 0 }, + { 0, 65504, 8262, 0 }, + { 0, 121, 0, 0 }, + { 1, 0, 0, 0 }, + { 0, 65535, 0, 0 }, + { 65415, 0, 0, 0 }, + { 65268, 65236, 0, 0 }, + { 0, 195, 0, 0 }, + { 210, 0, 0, 0 }, + { 206, 0, 0, 0 }, + { 205, 0, 0, 0 }, + { 79, 0, 0, 0 }, + { 202, 0, 0, 0 }, + { 203, 0, 0, 0 }, + { 207, 0, 0, 0 }, + { 0, 97, 0, 0 }, + { 211, 0, 0, 0 }, + { 209, 0, 0, 0 }, + { 0, 163, 0, 0 }, + { 213, 0, 0, 0 }, + { 0, 130, 0, 0 }, + { 214, 0, 0, 0 }, + { 218, 0, 0, 0 }, + { 217, 0, 0, 0 }, + { 219, 0, 0, 0 }, + { 0, 56, 0, 0 }, + { 2, 1, 0, 0 }, + { 1, 65535, 0, 0 }, + { 0, 65534, 65535, 0 }, + { 0, 65457, 0, 0 }, + { 65439, 0, 0, 0 }, + { 65480, 0, 0, 0 }, + { 65406, 0, 0, 0 }, + { 10795, 0, 0, 0 }, + { 65373, 0, 0, 0 }, + { 10792, 0, 0, 0 }, + { 0, 10815, 0, 0 }, + { 65341, 0, 0, 0 }, + { 69, 0, 0, 0 }, + { 71, 0, 0, 0 }, + { 0, 10783, 0, 0 }, + { 0, 10780, 0, 0 }, + { 0, 10782, 0, 0 }, + { 0, 65326, 0, 0 }, + { 0, 65330, 0, 0 }, + { 0, 65331, 0, 0 }, + { 0, 65334, 0, 0 }, + { 0, 65333, 0, 0 }, + { 0, 42319, 0, 0 }, + { 0, 42315, 0, 0 }, + { 0, 65329, 0, 0 }, + { 0, 42280, 0, 0 }, + { 0, 42308, 0, 0 }, + { 0, 65327, 0, 0 }, + { 0, 65325, 0, 0 }, + { 0, 10743, 0, 0 }, + { 0, 42305, 0, 0 }, + { 0, 10749, 0, 0 }, + { 0, 65323, 0, 0 }, + { 0, 65322, 0, 0 }, + { 0, 10727, 0, 0 }, + { 0, 65318, 0, 0 }, + { 0, 42282, 0, 0 }, + { 0, 65467, 0, 0 }, + { 0, 65319, 0, 0 }, + { 0, 65465, 0, 0 }, + { 0, 65317, 0, 0 }, + { 0, 42261, 0, 0 }, + { 0, 42258, 0, 0 }, + { 116, 84, 7289, 0 }, + { 116, 0, 0, 0 }, + { 38, 0, 0, 0 }, + { 37, 0, 0, 0 }, + { 64, 0, 0, 0 }, + { 63, 0, 0, 0 }, + { 32, 62, 0, 0 }, + { 32, 96, 0, 0 }, + { 32, 57, 92, 0 }, + { 32, 65452, 7205, 0 }, + { 32, 86, 0, 0 }, + { 32, 64793, 0, 0 }, + { 32, 54, 0, 0 }, + { 32, 80, 0, 0 }, + { 32, 31, 0, 0 }, + { 32, 47, 0, 0 }, + { 32, 7549, 0, 0 }, + { 0, 65498, 0, 0 }, + { 0, 65499, 0, 0 }, + { 0, 65504, 30, 0 }, + { 0, 65504, 64, 0 }, + { 0, 65504, 25, 60 }, + { 0, 65420, 65504, 7173 }, + { 0, 65504, 54, 0 }, + { 0, 64761, 65504, 0 }, + { 0, 65504, 22, 0 }, + { 0, 65504, 48, 0 }, + { 1, 65505, 0, 0 }, + { 0, 65504, 65535, 0 }, + { 0, 65504, 15, 0 }, + { 0, 65504, 7517, 0 }, + { 0, 65472, 0, 0 }, + { 0, 65473, 0, 0 }, + { 8, 0, 0, 0 }, + { 65506, 65474, 0, 0 }, + { 65511, 65479, 35, 0 }, + { 65521, 65489, 0, 0 }, + { 65514, 65482, 0, 0 }, + { 0, 65528, 0, 0 }, + { 65482, 65450, 0, 0 }, + { 65488, 65456, 0, 0 }, + { 0, 7, 0, 0 }, + { 0, 65420, 0, 0 }, + { 65476, 65444, 65501, 0 }, + { 65472, 65440, 0, 0 }, + { 65529, 0, 0, 0 }, + { 80, 0, 0, 0 }, + { 32, 6254, 0, 0 }, + { 32, 6253, 0, 0 }, + { 32, 6244, 0, 0 }, + { 32, 6242, 0, 0 }, + { 32, 6242, 6243, 0 }, + { 32, 6236, 0, 0 }, + { 0, 65504, 6222, 0 }, + { 0, 65504, 6221, 0 }, + { 0, 65504, 6212, 0 }, + { 0, 65504, 6210, 0 }, + { 0, 65504, 6210, 6211 }, + { 0, 65504, 6204, 0 }, + { 0, 65456, 0, 0 }, + { 1, 6181, 0, 0 }, + { 0, 65535, 6180, 0 }, + { 15, 0, 0, 0 }, + { 0, 65521, 0, 0 }, + { 48, 0, 0, 0 }, + { 0, 65488, 0, 0 }, + { 7264, 0, 0, 0 }, + { 0, 3008, 0, 0 }, + { 0, 38864, 0, 0 }, + { 0, 8, 0, 0 }, + { 65528, 0, 0, 0 }, + { 59314, 59282, 0, 0 }, + { 59315, 59283, 0, 0 }, + { 59324, 59292, 0, 0 }, + { 59326, 59294, 0, 0 }, + { 59326, 59294, 1, 0 }, + { 59325, 59293, 65535, 0 }, + { 59332, 59300, 0, 0 }, + { 59356, 59355, 0, 0 }, + { 35267, 35266, 0, 0 }, + { 62528, 0, 0, 0 }, + { 0, 35332, 0, 0 }, + { 0, 3814, 0, 0 }, + { 1, 59, 0, 0 }, + { 0, 65535, 58, 0 }, + { 65478, 65477, 0, 0 }, + { 57921, 0, 0, 0 }, + { 0, 74, 0, 0 }, + { 0, 86, 0, 0 }, + { 0, 100, 0, 0 }, + { 0, 128, 0, 0 }, + { 0, 112, 0, 0 }, + { 0, 126, 0, 0 }, + { 0, 9, 0, 0 }, + { 65462, 0, 0, 0 }, + { 65527, 0, 0, 0 }, + { 58363, 58247, 58331, 0 }, + { 65450, 0, 0, 0 }, + { 65436, 0, 0, 0 }, + { 65424, 0, 0, 0 }, + { 65408, 0, 0, 0 }, + { 65410, 0, 0, 0 }, + { 58019, 57987, 0, 0 }, + { 57153, 57121, 0, 0 }, + { 57274, 57242, 0, 0 }, + { 28, 0, 0, 0 }, + { 0, 65508, 0, 0 }, + { 16, 0, 0, 0 }, + { 0, 65520, 0, 0 }, + { 26, 0, 0, 0 }, + { 0, 65510, 0, 0 }, + { 54793, 0, 0, 0 }, + { 61722, 0, 0, 0 }, + { 54809, 0, 0, 0 }, + { 0, 54741, 0, 0 }, + { 0, 54744, 0, 0 }, + { 54756, 0, 0, 0 }, + { 54787, 0, 0, 0 }, + { 54753, 0, 0, 0 }, + { 54754, 0, 0, 0 }, + { 54721, 0, 0, 0 }, + { 0, 58272, 0, 0 }, + { 1, 30270, 0, 0 }, + { 0, 30269, 65535, 0 }, + { 30204, 0, 0, 0 }, + { 23256, 0, 0, 0 }, + { 23228, 0, 0, 0 }, + { 23217, 0, 0, 0 }, + { 23221, 0, 0, 0 }, + { 23231, 0, 0, 0 }, + { 23278, 0, 0, 0 }, + { 23254, 0, 0, 0 }, + { 23275, 0, 0, 0 }, + { 928, 0, 0, 0 }, + { 0, 64608, 0, 0 }, + { 26672, 0, 0, 0 }, }; const uint8_t unicode::folding_index1[] = { @@ -1756,439 +1756,439 @@ const uint8_t unicode::folding_index2[] = { bool js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint) { - if (codePoint >= 0x10000 && codePoint <= 0x1000b) + if (codePoint >= 0x10000 && codePoint <= 0x1000B) // LINEAR B SYLLABLE B008 A .. LINEAR B SYLLABLE B046 JE return true; - if (codePoint >= 0x1000d && codePoint <= 0x10026) + if (codePoint >= 0x1000D && codePoint <= 0x10026) // LINEAR B SYLLABLE B036 JO .. LINEAR B SYLLABLE B032 QO return true; - if (codePoint >= 0x10028 && codePoint <= 0x1003a) + if (codePoint >= 0x10028 && codePoint <= 0x1003A) // LINEAR B SYLLABLE B060 RA .. LINEAR B SYLLABLE B042 WO return true; - if (codePoint >= 0x1003c && codePoint <= 0x1003d) + if (codePoint >= 0x1003C && codePoint <= 0x1003D) // LINEAR B SYLLABLE B017 ZA .. LINEAR B SYLLABLE B074 ZE return true; - if (codePoint >= 0x1003f && codePoint <= 0x1004d) + if (codePoint >= 0x1003F && codePoint <= 0x1004D) // LINEAR B SYLLABLE B020 ZO .. LINEAR B SYLLABLE B091 TWO return true; - if (codePoint >= 0x10050 && codePoint <= 0x1005d) + if (codePoint >= 0x10050 && codePoint <= 0x1005D) // LINEAR B SYMBOL B018 .. LINEAR B SYMBOL B089 return true; - if (codePoint >= 0x10080 && codePoint <= 0x100fa) + if (codePoint >= 0x10080 && codePoint <= 0x100FA) // LINEAR B IDEOGRAM B100 MAN .. LINEAR B IDEOGRAM VESSEL B305 return true; - if (codePoint >= 0x10140 && codePoint <= 0x10174) + if (codePoint >= 0x10140 && codePoint <= 0x10174) // GREEK ACROPHONIC ATTIC ONE QUARTER .. GREEK ACROPHONIC STRATIAN FIFTY MNAS return true; - if (codePoint >= 0x10280 && codePoint <= 0x1029c) + if (codePoint >= 0x10280 && codePoint <= 0x1029C) // LYCIAN LETTER A .. LYCIAN LETTER X return true; - if (codePoint >= 0x102a0 && codePoint <= 0x102d0) + if (codePoint >= 0x102A0 && codePoint <= 0x102D0) // CARIAN LETTER A .. CARIAN LETTER UUU3 return true; - if (codePoint >= 0x10300 && codePoint <= 0x1031f) + if (codePoint >= 0x10300 && codePoint <= 0x1031F) // OLD ITALIC LETTER A .. OLD ITALIC LETTER ESS return true; - if (codePoint >= 0x1032d && codePoint <= 0x1034a) + if (codePoint >= 0x1032D && codePoint <= 0x1034A) // OLD ITALIC LETTER YE .. GOTHIC LETTER NINE HUNDRED return true; - if (codePoint >= 0x10350 && codePoint <= 0x10375) + if (codePoint >= 0x10350 && codePoint <= 0x10375) // OLD PERMIC LETTER AN .. OLD PERMIC LETTER IA return true; - if (codePoint >= 0x10380 && codePoint <= 0x1039d) + if (codePoint >= 0x10380 && codePoint <= 0x1039D) // UGARITIC LETTER ALPA .. UGARITIC LETTER SSU return true; - if (codePoint >= 0x103a0 && codePoint <= 0x103c3) + if (codePoint >= 0x103A0 && codePoint <= 0x103C3) // OLD PERSIAN SIGN A .. OLD PERSIAN SIGN HA return true; - if (codePoint >= 0x103c8 && codePoint <= 0x103cf) + if (codePoint >= 0x103C8 && codePoint <= 0x103CF) // OLD PERSIAN SIGN AURAMAZDAA .. OLD PERSIAN SIGN BUUMISH return true; - if (codePoint >= 0x103d1 && codePoint <= 0x103d5) + if (codePoint >= 0x103D1 && codePoint <= 0x103D5) // OLD PERSIAN NUMBER ONE .. OLD PERSIAN NUMBER HUNDRED return true; - if (codePoint >= 0x10400 && codePoint <= 0x1049d) + if (codePoint >= 0x10400 && codePoint <= 0x1049D) // DESERET CAPITAL LETTER LONG I .. OSMANYA LETTER OO return true; - if (codePoint >= 0x104b0 && codePoint <= 0x104d3) + if (codePoint >= 0x104B0 && codePoint <= 0x104D3) // OSAGE CAPITAL LETTER A .. OSAGE CAPITAL LETTER ZHA return true; - if (codePoint >= 0x104d8 && codePoint <= 0x104fb) + if (codePoint >= 0x104D8 && codePoint <= 0x104FB) // OSAGE SMALL LETTER A .. OSAGE SMALL LETTER ZHA return true; - if (codePoint >= 0x10500 && codePoint <= 0x10527) + if (codePoint >= 0x10500 && codePoint <= 0x10527) // ELBASAN LETTER A .. ELBASAN LETTER KHE return true; - if (codePoint >= 0x10530 && codePoint <= 0x10563) + if (codePoint >= 0x10530 && codePoint <= 0x10563) // CAUCASIAN ALBANIAN LETTER ALT .. CAUCASIAN ALBANIAN LETTER KIW return true; - if (codePoint >= 0x10600 && codePoint <= 0x10736) + if (codePoint >= 0x10600 && codePoint <= 0x10736) // LINEAR A SIGN AB001 .. LINEAR A SIGN A664 return true; - if (codePoint >= 0x10740 && codePoint <= 0x10755) + if (codePoint >= 0x10740 && codePoint <= 0x10755) // LINEAR A SIGN A701 A .. LINEAR A SIGN A732 JE return true; - if (codePoint >= 0x10760 && codePoint <= 0x10767) + if (codePoint >= 0x10760 && codePoint <= 0x10767) // LINEAR A SIGN A800 .. LINEAR A SIGN A807 return true; - if (codePoint >= 0x10800 && codePoint <= 0x10805) + if (codePoint >= 0x10800 && codePoint <= 0x10805) // CYPRIOT SYLLABLE A .. CYPRIOT SYLLABLE JA return true; - if (codePoint >= 0x10808 && codePoint <= 0x10808) + if (codePoint >= 0x10808 && codePoint <= 0x10808) // CYPRIOT SYLLABLE JO .. CYPRIOT SYLLABLE JO return true; - if (codePoint >= 0x1080a && codePoint <= 0x10835) + if (codePoint >= 0x1080A && codePoint <= 0x10835) // CYPRIOT SYLLABLE KA .. CYPRIOT SYLLABLE WO return true; - if (codePoint >= 0x10837 && codePoint <= 0x10838) + if (codePoint >= 0x10837 && codePoint <= 0x10838) // CYPRIOT SYLLABLE XA .. CYPRIOT SYLLABLE XE return true; - if (codePoint >= 0x1083c && codePoint <= 0x1083c) + if (codePoint >= 0x1083C && codePoint <= 0x1083C) // CYPRIOT SYLLABLE ZA .. CYPRIOT SYLLABLE ZA return true; - if (codePoint >= 0x1083f && codePoint <= 0x10855) + if (codePoint >= 0x1083F && codePoint <= 0x10855) // CYPRIOT SYLLABLE ZO .. IMPERIAL ARAMAIC LETTER TAW return true; - if (codePoint >= 0x10860 && codePoint <= 0x10876) + if (codePoint >= 0x10860 && codePoint <= 0x10876) // PALMYRENE LETTER ALEPH .. PALMYRENE LETTER TAW return true; - if (codePoint >= 0x10880 && codePoint <= 0x1089e) + if (codePoint >= 0x10880 && codePoint <= 0x1089E) // NABATAEAN LETTER FINAL ALEPH .. NABATAEAN LETTER TAW return true; - if (codePoint >= 0x108e0 && codePoint <= 0x108f2) + if (codePoint >= 0x108E0 && codePoint <= 0x108F2) // HATRAN LETTER ALEPH .. HATRAN LETTER QOPH return true; - if (codePoint >= 0x108f4 && codePoint <= 0x108f5) + if (codePoint >= 0x108F4 && codePoint <= 0x108F5) // HATRAN LETTER SHIN .. HATRAN LETTER TAW return true; - if (codePoint >= 0x10900 && codePoint <= 0x10915) + if (codePoint >= 0x10900 && codePoint <= 0x10915) // PHOENICIAN LETTER ALF .. PHOENICIAN LETTER TAU return true; - if (codePoint >= 0x10920 && codePoint <= 0x10939) + if (codePoint >= 0x10920 && codePoint <= 0x10939) // LYDIAN LETTER A .. LYDIAN LETTER C return true; - if (codePoint >= 0x10980 && codePoint <= 0x109b7) + if (codePoint >= 0x10980 && codePoint <= 0x109B7) // MEROITIC HIEROGLYPHIC LETTER A .. MEROITIC CURSIVE LETTER DA return true; - if (codePoint >= 0x109be && codePoint <= 0x109bf) + if (codePoint >= 0x109BE && codePoint <= 0x109BF) // MEROITIC CURSIVE LOGOGRAM RMT .. MEROITIC CURSIVE LOGOGRAM IMN return true; - if (codePoint >= 0x10a00 && codePoint <= 0x10a00) + if (codePoint >= 0x10A00 && codePoint <= 0x10A00) // KHAROSHTHI LETTER A .. KHAROSHTHI LETTER A return true; - if (codePoint >= 0x10a10 && codePoint <= 0x10a13) + if (codePoint >= 0x10A10 && codePoint <= 0x10A13) // KHAROSHTHI LETTER KA .. KHAROSHTHI LETTER GHA return true; - if (codePoint >= 0x10a15 && codePoint <= 0x10a17) + if (codePoint >= 0x10A15 && codePoint <= 0x10A17) // KHAROSHTHI LETTER CA .. KHAROSHTHI LETTER JA return true; - if (codePoint >= 0x10a19 && codePoint <= 0x10a35) + if (codePoint >= 0x10A19 && codePoint <= 0x10A35) // KHAROSHTHI LETTER NYA .. KHAROSHTHI LETTER VHA return true; - if (codePoint >= 0x10a60 && codePoint <= 0x10a7c) + if (codePoint >= 0x10A60 && codePoint <= 0x10A7C) // OLD SOUTH ARABIAN LETTER HE .. OLD SOUTH ARABIAN LETTER THETH return true; - if (codePoint >= 0x10a80 && codePoint <= 0x10a9c) + if (codePoint >= 0x10A80 && codePoint <= 0x10A9C) // OLD NORTH ARABIAN LETTER HEH .. OLD NORTH ARABIAN LETTER ZAH return true; - if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7) + if (codePoint >= 0x10AC0 && codePoint <= 0x10AC7) // MANICHAEAN LETTER ALEPH .. MANICHAEAN LETTER WAW return true; - if (codePoint >= 0x10ac9 && codePoint <= 0x10ae4) + if (codePoint >= 0x10AC9 && codePoint <= 0x10AE4) // MANICHAEAN LETTER ZAYIN .. MANICHAEAN LETTER TAW return true; - if (codePoint >= 0x10b00 && codePoint <= 0x10b35) + if (codePoint >= 0x10B00 && codePoint <= 0x10B35) // AVESTAN LETTER A .. AVESTAN LETTER HE return true; - if (codePoint >= 0x10b40 && codePoint <= 0x10b55) + if (codePoint >= 0x10B40 && codePoint <= 0x10B55) // INSCRIPTIONAL PARTHIAN LETTER ALEPH .. INSCRIPTIONAL PARTHIAN LETTER TAW return true; - if (codePoint >= 0x10b60 && codePoint <= 0x10b72) + if (codePoint >= 0x10B60 && codePoint <= 0x10B72) // INSCRIPTIONAL PAHLAVI LETTER ALEPH .. INSCRIPTIONAL PAHLAVI LETTER TAW return true; - if (codePoint >= 0x10b80 && codePoint <= 0x10b91) + if (codePoint >= 0x10B80 && codePoint <= 0x10B91) // PSALTER PAHLAVI LETTER ALEPH .. PSALTER PAHLAVI LETTER TAW return true; - if (codePoint >= 0x10c00 && codePoint <= 0x10c48) + if (codePoint >= 0x10C00 && codePoint <= 0x10C48) // OLD TURKIC LETTER ORKHON A .. OLD TURKIC LETTER ORKHON BASH return true; - if (codePoint >= 0x10c80 && codePoint <= 0x10cb2) + if (codePoint >= 0x10C80 && codePoint <= 0x10CB2) // OLD HUNGARIAN CAPITAL LETTER A .. OLD HUNGARIAN CAPITAL LETTER US return true; - if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2) + if (codePoint >= 0x10CC0 && codePoint <= 0x10CF2) // OLD HUNGARIAN SMALL LETTER A .. OLD HUNGARIAN SMALL LETTER US return true; - if (codePoint >= 0x10d00 && codePoint <= 0x10d23) + if (codePoint >= 0x10D00 && codePoint <= 0x10D23) // HANIFI ROHINGYA LETTER A .. HANIFI ROHINGYA MARK NA KHONNA return true; - if (codePoint >= 0x10f00 && codePoint <= 0x10f1c) + if (codePoint >= 0x10F00 && codePoint <= 0x10F1C) // OLD SOGDIAN LETTER ALEPH .. OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL return true; - if (codePoint >= 0x10f27 && codePoint <= 0x10f27) + if (codePoint >= 0x10F27 && codePoint <= 0x10F27) // OLD SOGDIAN LIGATURE AYIN-DALETH .. OLD SOGDIAN LIGATURE AYIN-DALETH return true; - if (codePoint >= 0x10f30 && codePoint <= 0x10f45) + if (codePoint >= 0x10F30 && codePoint <= 0x10F45) // SOGDIAN LETTER ALEPH .. SOGDIAN INDEPENDENT SHIN return true; - if (codePoint >= 0x11003 && codePoint <= 0x11037) + if (codePoint >= 0x11003 && codePoint <= 0x11037) // BRAHMI SIGN JIHVAMULIYA .. BRAHMI LETTER OLD TAMIL NNNA return true; - if (codePoint >= 0x11083 && codePoint <= 0x110af) + if (codePoint >= 0x11083 && codePoint <= 0x110AF) // KAITHI LETTER A .. KAITHI LETTER HA return true; - if (codePoint >= 0x110d0 && codePoint <= 0x110e8) + if (codePoint >= 0x110D0 && codePoint <= 0x110E8) // SORA SOMPENG LETTER SAH .. SORA SOMPENG LETTER MAE return true; - if (codePoint >= 0x11103 && codePoint <= 0x11126) + if (codePoint >= 0x11103 && codePoint <= 0x11126) // CHAKMA LETTER AA .. CHAKMA LETTER HAA return true; - if (codePoint >= 0x11144 && codePoint <= 0x11144) + if (codePoint >= 0x11144 && codePoint <= 0x11144) // CHAKMA LETTER LHAA .. CHAKMA LETTER LHAA return true; - if (codePoint >= 0x11150 && codePoint <= 0x11172) + if (codePoint >= 0x11150 && codePoint <= 0x11172) // MAHAJANI LETTER A .. MAHAJANI LETTER RRA return true; - if (codePoint >= 0x11176 && codePoint <= 0x11176) + if (codePoint >= 0x11176 && codePoint <= 0x11176) // MAHAJANI LIGATURE SHRI .. MAHAJANI LIGATURE SHRI return true; - if (codePoint >= 0x11183 && codePoint <= 0x111b2) + if (codePoint >= 0x11183 && codePoint <= 0x111B2) // SHARADA LETTER A .. SHARADA LETTER HA return true; - if (codePoint >= 0x111c1 && codePoint <= 0x111c4) + if (codePoint >= 0x111C1 && codePoint <= 0x111C4) // SHARADA SIGN AVAGRAHA .. SHARADA OM return true; - if (codePoint >= 0x111da && codePoint <= 0x111da) + if (codePoint >= 0x111DA && codePoint <= 0x111DA) // SHARADA EKAM .. SHARADA EKAM return true; - if (codePoint >= 0x111dc && codePoint <= 0x111dc) + if (codePoint >= 0x111DC && codePoint <= 0x111DC) // SHARADA HEADSTROKE .. SHARADA HEADSTROKE return true; - if (codePoint >= 0x11200 && codePoint <= 0x11211) + if (codePoint >= 0x11200 && codePoint <= 0x11211) // KHOJKI LETTER A .. KHOJKI LETTER JJA return true; - if (codePoint >= 0x11213 && codePoint <= 0x1122b) + if (codePoint >= 0x11213 && codePoint <= 0x1122B) // KHOJKI LETTER NYA .. KHOJKI LETTER LLA return true; - if (codePoint >= 0x11280 && codePoint <= 0x11286) + if (codePoint >= 0x11280 && codePoint <= 0x11286) // MULTANI LETTER A .. MULTANI LETTER GA return true; - if (codePoint >= 0x11288 && codePoint <= 0x11288) + if (codePoint >= 0x11288 && codePoint <= 0x11288) // MULTANI LETTER GHA .. MULTANI LETTER GHA return true; - if (codePoint >= 0x1128a && codePoint <= 0x1128d) + if (codePoint >= 0x1128A && codePoint <= 0x1128D) // MULTANI LETTER CA .. MULTANI LETTER JJA return true; - if (codePoint >= 0x1128f && codePoint <= 0x1129d) + if (codePoint >= 0x1128F && codePoint <= 0x1129D) // MULTANI LETTER NYA .. MULTANI LETTER BA return true; - if (codePoint >= 0x1129f && codePoint <= 0x112a8) + if (codePoint >= 0x1129F && codePoint <= 0x112A8) // MULTANI LETTER BHA .. MULTANI LETTER RHA return true; - if (codePoint >= 0x112b0 && codePoint <= 0x112de) + if (codePoint >= 0x112B0 && codePoint <= 0x112DE) // KHUDAWADI LETTER A .. KHUDAWADI LETTER HA return true; - if (codePoint >= 0x11305 && codePoint <= 0x1130c) + if (codePoint >= 0x11305 && codePoint <= 0x1130C) // GRANTHA LETTER A .. GRANTHA LETTER VOCALIC L return true; - if (codePoint >= 0x1130f && codePoint <= 0x11310) + if (codePoint >= 0x1130F && codePoint <= 0x11310) // GRANTHA LETTER EE .. GRANTHA LETTER AI return true; - if (codePoint >= 0x11313 && codePoint <= 0x11328) + if (codePoint >= 0x11313 && codePoint <= 0x11328) // GRANTHA LETTER OO .. GRANTHA LETTER NA return true; - if (codePoint >= 0x1132a && codePoint <= 0x11330) + if (codePoint >= 0x1132A && codePoint <= 0x11330) // GRANTHA LETTER PA .. GRANTHA LETTER RA return true; - if (codePoint >= 0x11332 && codePoint <= 0x11333) + if (codePoint >= 0x11332 && codePoint <= 0x11333) // GRANTHA LETTER LA .. GRANTHA LETTER LLA return true; - if (codePoint >= 0x11335 && codePoint <= 0x11339) + if (codePoint >= 0x11335 && codePoint <= 0x11339) // GRANTHA LETTER VA .. GRANTHA LETTER HA return true; - if (codePoint >= 0x1133d && codePoint <= 0x1133d) + if (codePoint >= 0x1133D && codePoint <= 0x1133D) // GRANTHA SIGN AVAGRAHA .. GRANTHA SIGN AVAGRAHA return true; - if (codePoint >= 0x11350 && codePoint <= 0x11350) + if (codePoint >= 0x11350 && codePoint <= 0x11350) // GRANTHA OM .. GRANTHA OM return true; - if (codePoint >= 0x1135d && codePoint <= 0x11361) + if (codePoint >= 0x1135D && codePoint <= 0x11361) // GRANTHA SIGN PLUTA .. GRANTHA LETTER VOCALIC LL return true; - if (codePoint >= 0x11400 && codePoint <= 0x11434) + if (codePoint >= 0x11400 && codePoint <= 0x11434) // NEWA LETTER A .. NEWA LETTER HA return true; - if (codePoint >= 0x11447 && codePoint <= 0x1144a) + if (codePoint >= 0x11447 && codePoint <= 0x1144A) // NEWA SIGN AVAGRAHA .. NEWA SIDDHI return true; - if (codePoint >= 0x11480 && codePoint <= 0x114af) + if (codePoint >= 0x11480 && codePoint <= 0x114AF) // TIRHUTA ANJI .. TIRHUTA LETTER HA return true; - if (codePoint >= 0x114c4 && codePoint <= 0x114c5) + if (codePoint >= 0x114C4 && codePoint <= 0x114C5) // TIRHUTA SIGN AVAGRAHA .. TIRHUTA GVANG return true; - if (codePoint >= 0x114c7 && codePoint <= 0x114c7) + if (codePoint >= 0x114C7 && codePoint <= 0x114C7) // TIRHUTA OM .. TIRHUTA OM return true; - if (codePoint >= 0x11580 && codePoint <= 0x115ae) + if (codePoint >= 0x11580 && codePoint <= 0x115AE) // SIDDHAM LETTER A .. SIDDHAM LETTER HA return true; - if (codePoint >= 0x115d8 && codePoint <= 0x115db) + if (codePoint >= 0x115D8 && codePoint <= 0x115DB) // SIDDHAM LETTER THREE-CIRCLE ALTERNATE I .. SIDDHAM LETTER ALTERNATE U return true; - if (codePoint >= 0x11600 && codePoint <= 0x1162f) + if (codePoint >= 0x11600 && codePoint <= 0x1162F) // MODI LETTER A .. MODI LETTER LLA return true; - if (codePoint >= 0x11644 && codePoint <= 0x11644) + if (codePoint >= 0x11644 && codePoint <= 0x11644) // MODI SIGN HUVA .. MODI SIGN HUVA return true; - if (codePoint >= 0x11680 && codePoint <= 0x116aa) + if (codePoint >= 0x11680 && codePoint <= 0x116AA) // TAKRI LETTER A .. TAKRI LETTER RRA return true; - if (codePoint >= 0x11700 && codePoint <= 0x1171a) + if (codePoint >= 0x11700 && codePoint <= 0x1171A) // AHOM LETTER KA .. AHOM LETTER ALTERNATE BA return true; - if (codePoint >= 0x11800 && codePoint <= 0x1182b) + if (codePoint >= 0x11800 && codePoint <= 0x1182B) // DOGRA LETTER A .. DOGRA LETTER RRA return true; - if (codePoint >= 0x118a0 && codePoint <= 0x118df) + if (codePoint >= 0x118A0 && codePoint <= 0x118DF) // WARANG CITI CAPITAL LETTER NGAA .. WARANG CITI SMALL LETTER VIYO return true; - if (codePoint >= 0x118ff && codePoint <= 0x118ff) + if (codePoint >= 0x118FF && codePoint <= 0x118FF) // WARANG CITI OM .. WARANG CITI OM return true; - if (codePoint >= 0x11a00 && codePoint <= 0x11a00) + if (codePoint >= 0x11A00 && codePoint <= 0x11A00) // ZANABAZAR SQUARE LETTER A .. ZANABAZAR SQUARE LETTER A return true; - if (codePoint >= 0x11a0b && codePoint <= 0x11a32) + if (codePoint >= 0x11A0B && codePoint <= 0x11A32) // ZANABAZAR SQUARE LETTER KA .. ZANABAZAR SQUARE LETTER KSSA return true; - if (codePoint >= 0x11a3a && codePoint <= 0x11a3a) + if (codePoint >= 0x11A3A && codePoint <= 0x11A3A) // ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA .. ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA return true; - if (codePoint >= 0x11a50 && codePoint <= 0x11a50) + if (codePoint >= 0x11A50 && codePoint <= 0x11A50) // SOYOMBO LETTER A .. SOYOMBO LETTER A return true; - if (codePoint >= 0x11a5c && codePoint <= 0x11a83) + if (codePoint >= 0x11A5C && codePoint <= 0x11A83) // SOYOMBO LETTER KA .. SOYOMBO LETTER KSSA return true; - if (codePoint >= 0x11a86 && codePoint <= 0x11a89) + if (codePoint >= 0x11A86 && codePoint <= 0x11A89) // SOYOMBO CLUSTER-INITIAL LETTER RA .. SOYOMBO CLUSTER-INITIAL LETTER SA return true; - if (codePoint >= 0x11a9d && codePoint <= 0x11a9d) + if (codePoint >= 0x11A9D && codePoint <= 0x11A9D) // SOYOMBO MARK PLUTA .. SOYOMBO MARK PLUTA return true; - if (codePoint >= 0x11ac0 && codePoint <= 0x11af8) + if (codePoint >= 0x11AC0 && codePoint <= 0x11AF8) // PAU CIN HAU LETTER PA .. PAU CIN HAU GLOTTAL STOP FINAL return true; - if (codePoint >= 0x11c00 && codePoint <= 0x11c08) + if (codePoint >= 0x11C00 && codePoint <= 0x11C08) // BHAIKSUKI LETTER A .. BHAIKSUKI LETTER VOCALIC L return true; - if (codePoint >= 0x11c0a && codePoint <= 0x11c2e) + if (codePoint >= 0x11C0A && codePoint <= 0x11C2E) // BHAIKSUKI LETTER E .. BHAIKSUKI LETTER HA return true; - if (codePoint >= 0x11c40 && codePoint <= 0x11c40) + if (codePoint >= 0x11C40 && codePoint <= 0x11C40) // BHAIKSUKI SIGN AVAGRAHA .. BHAIKSUKI SIGN AVAGRAHA return true; - if (codePoint >= 0x11c72 && codePoint <= 0x11c8f) + if (codePoint >= 0x11C72 && codePoint <= 0x11C8F) // MARCHEN LETTER KA .. MARCHEN LETTER A return true; - if (codePoint >= 0x11d00 && codePoint <= 0x11d06) + if (codePoint >= 0x11D00 && codePoint <= 0x11D06) // MASARAM GONDI LETTER A .. MASARAM GONDI LETTER E return true; - if (codePoint >= 0x11d08 && codePoint <= 0x11d09) + if (codePoint >= 0x11D08 && codePoint <= 0x11D09) // MASARAM GONDI LETTER AI .. MASARAM GONDI LETTER O return true; - if (codePoint >= 0x11d0b && codePoint <= 0x11d30) + if (codePoint >= 0x11D0B && codePoint <= 0x11D30) // MASARAM GONDI LETTER AU .. MASARAM GONDI LETTER TRA return true; - if (codePoint >= 0x11d46 && codePoint <= 0x11d46) + if (codePoint >= 0x11D46 && codePoint <= 0x11D46) // MASARAM GONDI REPHA .. MASARAM GONDI REPHA return true; - if (codePoint >= 0x11d60 && codePoint <= 0x11d65) + if (codePoint >= 0x11D60 && codePoint <= 0x11D65) // GUNJALA GONDI LETTER A .. GUNJALA GONDI LETTER UU return true; - if (codePoint >= 0x11d67 && codePoint <= 0x11d68) + if (codePoint >= 0x11D67 && codePoint <= 0x11D68) // GUNJALA GONDI LETTER EE .. GUNJALA GONDI LETTER AI return true; - if (codePoint >= 0x11d6a && codePoint <= 0x11d89) + if (codePoint >= 0x11D6A && codePoint <= 0x11D89) // GUNJALA GONDI LETTER OO .. GUNJALA GONDI LETTER SA return true; - if (codePoint >= 0x11d98 && codePoint <= 0x11d98) + if (codePoint >= 0x11D98 && codePoint <= 0x11D98) // GUNJALA GONDI OM .. GUNJALA GONDI OM return true; - if (codePoint >= 0x11ee0 && codePoint <= 0x11ef2) + if (codePoint >= 0x11EE0 && codePoint <= 0x11EF2) // MAKASAR LETTER KA .. MAKASAR ANGKA return true; - if (codePoint >= 0x12000 && codePoint <= 0x12399) + if (codePoint >= 0x12000 && codePoint <= 0x12399) // CUNEIFORM SIGN A .. CUNEIFORM SIGN U U return true; - if (codePoint >= 0x12400 && codePoint <= 0x1246e) + if (codePoint >= 0x12400 && codePoint <= 0x1246E) // CUNEIFORM NUMERIC SIGN TWO ASH .. CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM return true; - if (codePoint >= 0x12480 && codePoint <= 0x12543) + if (codePoint >= 0x12480 && codePoint <= 0x12543) // CUNEIFORM SIGN AB TIMES NUN TENU .. CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU return true; - if (codePoint >= 0x13000 && codePoint <= 0x1342e) + if (codePoint >= 0x13000 && codePoint <= 0x1342E) // EGYPTIAN HIEROGLYPH A001 .. EGYPTIAN HIEROGLYPH AA032 return true; - if (codePoint >= 0x14400 && codePoint <= 0x14646) + if (codePoint >= 0x14400 && codePoint <= 0x14646) // ANATOLIAN HIEROGLYPH A001 .. ANATOLIAN HIEROGLYPH A530 return true; - if (codePoint >= 0x16800 && codePoint <= 0x16a38) + if (codePoint >= 0x16800 && codePoint <= 0x16A38) // BAMUM LETTER PHASE-A NGKUE MFON .. BAMUM LETTER PHASE-F VUEQ return true; - if (codePoint >= 0x16a40 && codePoint <= 0x16a5e) + if (codePoint >= 0x16A40 && codePoint <= 0x16A5E) // MRO LETTER TA .. MRO LETTER TEK return true; - if (codePoint >= 0x16ad0 && codePoint <= 0x16aed) + if (codePoint >= 0x16AD0 && codePoint <= 0x16AED) // BASSA VAH LETTER ENNI .. BASSA VAH LETTER I return true; - if (codePoint >= 0x16b00 && codePoint <= 0x16b2f) + if (codePoint >= 0x16B00 && codePoint <= 0x16B2F) // PAHAWH HMONG VOWEL KEEB .. PAHAWH HMONG CONSONANT CAU return true; - if (codePoint >= 0x16b40 && codePoint <= 0x16b43) + if (codePoint >= 0x16B40 && codePoint <= 0x16B43) // PAHAWH HMONG SIGN VOS SEEV .. PAHAWH HMONG SIGN IB YAM return true; - if (codePoint >= 0x16b63 && codePoint <= 0x16b77) + if (codePoint >= 0x16B63 && codePoint <= 0x16B77) // PAHAWH HMONG SIGN VOS LUB .. PAHAWH HMONG SIGN CIM NRES TOS return true; - if (codePoint >= 0x16b7d && codePoint <= 0x16b8f) + if (codePoint >= 0x16B7D && codePoint <= 0x16B8F) // PAHAWH HMONG CLAN SIGN TSHEEJ .. PAHAWH HMONG CLAN SIGN VWJ return true; - if (codePoint >= 0x16e40 && codePoint <= 0x16e7f) + if (codePoint >= 0x16E40 && codePoint <= 0x16E7F) // MEDEFAIDRIN CAPITAL LETTER M .. MEDEFAIDRIN SMALL LETTER Y return true; - if (codePoint >= 0x16f00 && codePoint <= 0x16f44) + if (codePoint >= 0x16F00 && codePoint <= 0x16F44) // MIAO LETTER PA .. MIAO LETTER HHA return true; - if (codePoint >= 0x16f50 && codePoint <= 0x16f50) + if (codePoint >= 0x16F50 && codePoint <= 0x16F50) // MIAO LETTER NASALIZATION .. MIAO LETTER NASALIZATION return true; - if (codePoint >= 0x16f93 && codePoint <= 0x16f9f) + if (codePoint >= 0x16F93 && codePoint <= 0x16F9F) // MIAO LETTER TONE-2 .. MIAO LETTER REFORMED TONE-8 return true; - if (codePoint >= 0x16fe0 && codePoint <= 0x16fe1) + if (codePoint >= 0x16FE0 && codePoint <= 0x16FE1) // TANGUT ITERATION MARK .. NUSHU ITERATION MARK return true; - if (codePoint >= 0x17000 && codePoint <= 0x187f1) + if (codePoint >= 0x17000 && codePoint <= 0x187F1) // Tangut Ideograph .. Tangut Ideograph return true; - if (codePoint >= 0x18800 && codePoint <= 0x18af2) + if (codePoint >= 0x18800 && codePoint <= 0x18AF2) // TANGUT COMPONENT-001 .. TANGUT COMPONENT-755 return true; - if (codePoint >= 0x1b000 && codePoint <= 0x1b11e) + if (codePoint >= 0x1B000 && codePoint <= 0x1B11E) // KATAKANA LETTER ARCHAIC E .. HENTAIGANA LETTER N-MU-MO-2 return true; - if (codePoint >= 0x1b170 && codePoint <= 0x1b2fb) + if (codePoint >= 0x1B170 && codePoint <= 0x1B2FB) // NUSHU CHARACTER-1B170 .. NUSHU CHARACTER-1B2FB return true; - if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a) + if (codePoint >= 0x1BC00 && codePoint <= 0x1BC6A) // DUPLOYAN LETTER H .. DUPLOYAN LETTER VOCALIC M return true; - if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c) + if (codePoint >= 0x1BC70 && codePoint <= 0x1BC7C) // DUPLOYAN AFFIX LEFT HORIZONTAL SECANT .. DUPLOYAN AFFIX ATTACHED TANGENT HOOK return true; - if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88) + if (codePoint >= 0x1BC80 && codePoint <= 0x1BC88) // DUPLOYAN AFFIX HIGH ACUTE .. DUPLOYAN AFFIX HIGH VERTICAL return true; - if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99) + if (codePoint >= 0x1BC90 && codePoint <= 0x1BC99) // DUPLOYAN AFFIX LOW ACUTE .. DUPLOYAN AFFIX LOW ARROW return true; - if (codePoint >= 0x1d400 && codePoint <= 0x1d454) + if (codePoint >= 0x1D400 && codePoint <= 0x1D454) // MATHEMATICAL BOLD CAPITAL A .. MATHEMATICAL ITALIC SMALL G return true; - if (codePoint >= 0x1d456 && codePoint <= 0x1d49c) + if (codePoint >= 0x1D456 && codePoint <= 0x1D49C) // MATHEMATICAL ITALIC SMALL I .. MATHEMATICAL SCRIPT CAPITAL A return true; - if (codePoint >= 0x1d49e && codePoint <= 0x1d49f) + if (codePoint >= 0x1D49E && codePoint <= 0x1D49F) // MATHEMATICAL SCRIPT CAPITAL C .. MATHEMATICAL SCRIPT CAPITAL D return true; - if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2) + if (codePoint >= 0x1D4A2 && codePoint <= 0x1D4A2) // MATHEMATICAL SCRIPT CAPITAL G .. MATHEMATICAL SCRIPT CAPITAL G return true; - if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6) + if (codePoint >= 0x1D4A5 && codePoint <= 0x1D4A6) // MATHEMATICAL SCRIPT CAPITAL J .. MATHEMATICAL SCRIPT CAPITAL K return true; - if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac) + if (codePoint >= 0x1D4A9 && codePoint <= 0x1D4AC) // MATHEMATICAL SCRIPT CAPITAL N .. MATHEMATICAL SCRIPT CAPITAL Q return true; - if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9) + if (codePoint >= 0x1D4AE && codePoint <= 0x1D4B9) // MATHEMATICAL SCRIPT CAPITAL S .. MATHEMATICAL SCRIPT SMALL D return true; - if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb) + if (codePoint >= 0x1D4BB && codePoint <= 0x1D4BB) // MATHEMATICAL SCRIPT SMALL F .. MATHEMATICAL SCRIPT SMALL F return true; - if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3) + if (codePoint >= 0x1D4BD && codePoint <= 0x1D4C3) // MATHEMATICAL SCRIPT SMALL H .. MATHEMATICAL SCRIPT SMALL N return true; - if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505) + if (codePoint >= 0x1D4C5 && codePoint <= 0x1D505) // MATHEMATICAL SCRIPT SMALL P .. MATHEMATICAL FRAKTUR CAPITAL B return true; - if (codePoint >= 0x1d507 && codePoint <= 0x1d50a) + if (codePoint >= 0x1D507 && codePoint <= 0x1D50A) // MATHEMATICAL FRAKTUR CAPITAL D .. MATHEMATICAL FRAKTUR CAPITAL G return true; - if (codePoint >= 0x1d50d && codePoint <= 0x1d514) + if (codePoint >= 0x1D50D && codePoint <= 0x1D514) // MATHEMATICAL FRAKTUR CAPITAL J .. MATHEMATICAL FRAKTUR CAPITAL Q return true; - if (codePoint >= 0x1d516 && codePoint <= 0x1d51c) + if (codePoint >= 0x1D516 && codePoint <= 0x1D51C) // MATHEMATICAL FRAKTUR CAPITAL S .. MATHEMATICAL FRAKTUR CAPITAL Y return true; - if (codePoint >= 0x1d51e && codePoint <= 0x1d539) + if (codePoint >= 0x1D51E && codePoint <= 0x1D539) // MATHEMATICAL FRAKTUR SMALL A .. MATHEMATICAL DOUBLE-STRUCK CAPITAL B return true; - if (codePoint >= 0x1d53b && codePoint <= 0x1d53e) + if (codePoint >= 0x1D53B && codePoint <= 0x1D53E) // MATHEMATICAL DOUBLE-STRUCK CAPITAL D .. MATHEMATICAL DOUBLE-STRUCK CAPITAL G return true; - if (codePoint >= 0x1d540 && codePoint <= 0x1d544) + if (codePoint >= 0x1D540 && codePoint <= 0x1D544) // MATHEMATICAL DOUBLE-STRUCK CAPITAL I .. MATHEMATICAL DOUBLE-STRUCK CAPITAL M return true; - if (codePoint >= 0x1d546 && codePoint <= 0x1d546) + if (codePoint >= 0x1D546 && codePoint <= 0x1D546) // MATHEMATICAL DOUBLE-STRUCK CAPITAL O .. MATHEMATICAL DOUBLE-STRUCK CAPITAL O return true; - if (codePoint >= 0x1d54a && codePoint <= 0x1d550) + if (codePoint >= 0x1D54A && codePoint <= 0x1D550) // MATHEMATICAL DOUBLE-STRUCK CAPITAL S .. MATHEMATICAL DOUBLE-STRUCK CAPITAL Y return true; - if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5) + if (codePoint >= 0x1D552 && codePoint <= 0x1D6A5) // MATHEMATICAL DOUBLE-STRUCK SMALL A .. MATHEMATICAL ITALIC SMALL DOTLESS J return true; - if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0) + if (codePoint >= 0x1D6A8 && codePoint <= 0x1D6C0) // MATHEMATICAL BOLD CAPITAL ALPHA .. MATHEMATICAL BOLD CAPITAL OMEGA return true; - if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da) + if (codePoint >= 0x1D6C2 && codePoint <= 0x1D6DA) // MATHEMATICAL BOLD SMALL ALPHA .. MATHEMATICAL BOLD SMALL OMEGA return true; - if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa) + if (codePoint >= 0x1D6DC && codePoint <= 0x1D6FA) // MATHEMATICAL BOLD EPSILON SYMBOL .. MATHEMATICAL ITALIC CAPITAL OMEGA return true; - if (codePoint >= 0x1d6fc && codePoint <= 0x1d714) + if (codePoint >= 0x1D6FC && codePoint <= 0x1D714) // MATHEMATICAL ITALIC SMALL ALPHA .. MATHEMATICAL ITALIC SMALL OMEGA return true; - if (codePoint >= 0x1d716 && codePoint <= 0x1d734) + if (codePoint >= 0x1D716 && codePoint <= 0x1D734) // MATHEMATICAL ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD ITALIC CAPITAL OMEGA return true; - if (codePoint >= 0x1d736 && codePoint <= 0x1d74e) + if (codePoint >= 0x1D736 && codePoint <= 0x1D74E) // MATHEMATICAL BOLD ITALIC SMALL ALPHA .. MATHEMATICAL BOLD ITALIC SMALL OMEGA return true; - if (codePoint >= 0x1d750 && codePoint <= 0x1d76e) + if (codePoint >= 0x1D750 && codePoint <= 0x1D76E) // MATHEMATICAL BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA return true; - if (codePoint >= 0x1d770 && codePoint <= 0x1d788) + if (codePoint >= 0x1D770 && codePoint <= 0x1D788) // MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA return true; - if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8) + if (codePoint >= 0x1D78A && codePoint <= 0x1D7A8) // MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA return true; - if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2) + if (codePoint >= 0x1D7AA && codePoint <= 0x1D7C2) // MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA return true; - if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb) + if (codePoint >= 0x1D7C4 && codePoint <= 0x1D7CB) // MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD SMALL DIGAMMA return true; - if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4) + if (codePoint >= 0x1E800 && codePoint <= 0x1E8C4) // MENDE KIKAKUI SYLLABLE M001 KI .. MENDE KIKAKUI SYLLABLE M060 NYON return true; - if (codePoint >= 0x1e900 && codePoint <= 0x1e943) + if (codePoint >= 0x1E900 && codePoint <= 0x1E943) // ADLAM CAPITAL LETTER ALIF .. ADLAM SMALL LETTER SHA return true; - if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03) + if (codePoint >= 0x1EE00 && codePoint <= 0x1EE03) // ARABIC MATHEMATICAL ALEF .. ARABIC MATHEMATICAL DAL return true; - if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f) + if (codePoint >= 0x1EE05 && codePoint <= 0x1EE1F) // ARABIC MATHEMATICAL WAW .. ARABIC MATHEMATICAL DOTLESS QAF return true; - if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22) + if (codePoint >= 0x1EE21 && codePoint <= 0x1EE22) // ARABIC MATHEMATICAL INITIAL BEH .. ARABIC MATHEMATICAL INITIAL JEEM return true; - if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24) + if (codePoint >= 0x1EE24 && codePoint <= 0x1EE24) // ARABIC MATHEMATICAL INITIAL HEH .. ARABIC MATHEMATICAL INITIAL HEH return true; - if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27) + if (codePoint >= 0x1EE27 && codePoint <= 0x1EE27) // ARABIC MATHEMATICAL INITIAL HAH .. ARABIC MATHEMATICAL INITIAL HAH return true; - if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32) + if (codePoint >= 0x1EE29 && codePoint <= 0x1EE32) // ARABIC MATHEMATICAL INITIAL YEH .. ARABIC MATHEMATICAL INITIAL QAF return true; - if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37) + if (codePoint >= 0x1EE34 && codePoint <= 0x1EE37) // ARABIC MATHEMATICAL INITIAL SHEEN .. ARABIC MATHEMATICAL INITIAL KHAH return true; - if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39) + if (codePoint >= 0x1EE39 && codePoint <= 0x1EE39) // ARABIC MATHEMATICAL INITIAL DAD .. ARABIC MATHEMATICAL INITIAL DAD return true; - if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b) + if (codePoint >= 0x1EE3B && codePoint <= 0x1EE3B) // ARABIC MATHEMATICAL INITIAL GHAIN .. ARABIC MATHEMATICAL INITIAL GHAIN return true; - if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42) + if (codePoint >= 0x1EE42 && codePoint <= 0x1EE42) // ARABIC MATHEMATICAL TAILED JEEM .. ARABIC MATHEMATICAL TAILED JEEM return true; - if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47) + if (codePoint >= 0x1EE47 && codePoint <= 0x1EE47) // ARABIC MATHEMATICAL TAILED HAH .. ARABIC MATHEMATICAL TAILED HAH return true; - if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49) + if (codePoint >= 0x1EE49 && codePoint <= 0x1EE49) // ARABIC MATHEMATICAL TAILED YEH .. ARABIC MATHEMATICAL TAILED YEH return true; - if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b) + if (codePoint >= 0x1EE4B && codePoint <= 0x1EE4B) // ARABIC MATHEMATICAL TAILED LAM .. ARABIC MATHEMATICAL TAILED LAM return true; - if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f) + if (codePoint >= 0x1EE4D && codePoint <= 0x1EE4F) // ARABIC MATHEMATICAL TAILED NOON .. ARABIC MATHEMATICAL TAILED AIN return true; - if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52) + if (codePoint >= 0x1EE51 && codePoint <= 0x1EE52) // ARABIC MATHEMATICAL TAILED SAD .. ARABIC MATHEMATICAL TAILED QAF return true; - if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54) + if (codePoint >= 0x1EE54 && codePoint <= 0x1EE54) // ARABIC MATHEMATICAL TAILED SHEEN .. ARABIC MATHEMATICAL TAILED SHEEN return true; - if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57) + if (codePoint >= 0x1EE57 && codePoint <= 0x1EE57) // ARABIC MATHEMATICAL TAILED KHAH .. ARABIC MATHEMATICAL TAILED KHAH return true; - if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59) + if (codePoint >= 0x1EE59 && codePoint <= 0x1EE59) // ARABIC MATHEMATICAL TAILED DAD .. ARABIC MATHEMATICAL TAILED DAD return true; - if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b) + if (codePoint >= 0x1EE5B && codePoint <= 0x1EE5B) // ARABIC MATHEMATICAL TAILED GHAIN .. ARABIC MATHEMATICAL TAILED GHAIN return true; - if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d) + if (codePoint >= 0x1EE5D && codePoint <= 0x1EE5D) // ARABIC MATHEMATICAL TAILED DOTLESS NOON .. ARABIC MATHEMATICAL TAILED DOTLESS NOON return true; - if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f) + if (codePoint >= 0x1EE5F && codePoint <= 0x1EE5F) // ARABIC MATHEMATICAL TAILED DOTLESS QAF .. ARABIC MATHEMATICAL TAILED DOTLESS QAF return true; - if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62) + if (codePoint >= 0x1EE61 && codePoint <= 0x1EE62) // ARABIC MATHEMATICAL STRETCHED BEH .. ARABIC MATHEMATICAL STRETCHED JEEM return true; - if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64) + if (codePoint >= 0x1EE64 && codePoint <= 0x1EE64) // ARABIC MATHEMATICAL STRETCHED HEH .. ARABIC MATHEMATICAL STRETCHED HEH return true; - if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a) + if (codePoint >= 0x1EE67 && codePoint <= 0x1EE6A) // ARABIC MATHEMATICAL STRETCHED HAH .. ARABIC MATHEMATICAL STRETCHED KAF return true; - if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72) + if (codePoint >= 0x1EE6C && codePoint <= 0x1EE72) // ARABIC MATHEMATICAL STRETCHED MEEM .. ARABIC MATHEMATICAL STRETCHED QAF return true; - if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77) + if (codePoint >= 0x1EE74 && codePoint <= 0x1EE77) // ARABIC MATHEMATICAL STRETCHED SHEEN .. ARABIC MATHEMATICAL STRETCHED KHAH return true; - if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c) + if (codePoint >= 0x1EE79 && codePoint <= 0x1EE7C) // ARABIC MATHEMATICAL STRETCHED DAD .. ARABIC MATHEMATICAL STRETCHED DOTLESS BEH return true; - if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e) + if (codePoint >= 0x1EE7E && codePoint <= 0x1EE7E) // ARABIC MATHEMATICAL STRETCHED DOTLESS FEH .. ARABIC MATHEMATICAL STRETCHED DOTLESS FEH return true; - if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89) + if (codePoint >= 0x1EE80 && codePoint <= 0x1EE89) // ARABIC MATHEMATICAL LOOPED ALEF .. ARABIC MATHEMATICAL LOOPED YEH return true; - if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b) + if (codePoint >= 0x1EE8B && codePoint <= 0x1EE9B) // ARABIC MATHEMATICAL LOOPED LAM .. ARABIC MATHEMATICAL LOOPED GHAIN return true; - if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3) + if (codePoint >= 0x1EEA1 && codePoint <= 0x1EEA3) // ARABIC MATHEMATICAL DOUBLE-STRUCK BEH .. ARABIC MATHEMATICAL DOUBLE-STRUCK DAL return true; - if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9) + if (codePoint >= 0x1EEA5 && codePoint <= 0x1EEA9) // ARABIC MATHEMATICAL DOUBLE-STRUCK WAW .. ARABIC MATHEMATICAL DOUBLE-STRUCK YEH return true; - if (codePoint >= 0x1eeab && codePoint <= 0x1eebb) + if (codePoint >= 0x1EEAB && codePoint <= 0x1EEBB) // ARABIC MATHEMATICAL DOUBLE-STRUCK LAM .. ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN return true; - if (codePoint >= 0x20000 && codePoint <= 0x2a6d6) + if (codePoint >= 0x20000 && codePoint <= 0x2A6D6) // CJK Ideograph Extension B .. CJK Ideograph Extension B return true; - if (codePoint >= 0x2a700 && codePoint <= 0x2b734) + if (codePoint >= 0x2A700 && codePoint <= 0x2B734) // CJK Ideograph Extension C .. CJK Ideograph Extension C return true; - if (codePoint >= 0x2b740 && codePoint <= 0x2b81d) + if (codePoint >= 0x2B740 && codePoint <= 0x2B81D) // CJK Ideograph Extension D .. CJK Ideograph Extension D return true; - if (codePoint >= 0x2b820 && codePoint <= 0x2cea1) + if (codePoint >= 0x2B820 && codePoint <= 0x2CEA1) // CJK Ideograph Extension E .. CJK Ideograph Extension E return true; - if (codePoint >= 0x2ceb0 && codePoint <= 0x2ebe0) + if (codePoint >= 0x2CEB0 && codePoint <= 0x2EBE0) // CJK Ideograph Extension F .. CJK Ideograph Extension F return true; - if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d) + if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) // CJK COMPATIBILITY IDEOGRAPH-2F800 .. CJK COMPATIBILITY IDEOGRAPH-2FA1D return true; return false; } @@ -2196,549 +2196,1149 @@ js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint) bool js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint) { - if (codePoint >= 0x10000 && codePoint <= 0x1000b) + if (codePoint >= 0x10000 && codePoint <= 0x1000B) // LINEAR B SYLLABLE B008 A .. LINEAR B SYLLABLE B046 JE return true; - if (codePoint >= 0x1000d && codePoint <= 0x10026) + if (codePoint >= 0x1000D && codePoint <= 0x10026) // LINEAR B SYLLABLE B036 JO .. LINEAR B SYLLABLE B032 QO return true; - if (codePoint >= 0x10028 && codePoint <= 0x1003a) + if (codePoint >= 0x10028 && codePoint <= 0x1003A) // LINEAR B SYLLABLE B060 RA .. LINEAR B SYLLABLE B042 WO return true; - if (codePoint >= 0x1003c && codePoint <= 0x1003d) + if (codePoint >= 0x1003C && codePoint <= 0x1003D) // LINEAR B SYLLABLE B017 ZA .. LINEAR B SYLLABLE B074 ZE return true; - if (codePoint >= 0x1003f && codePoint <= 0x1004d) + if (codePoint >= 0x1003F && codePoint <= 0x1004D) // LINEAR B SYLLABLE B020 ZO .. LINEAR B SYLLABLE B091 TWO return true; - if (codePoint >= 0x10050 && codePoint <= 0x1005d) + if (codePoint >= 0x10050 && codePoint <= 0x1005D) // LINEAR B SYMBOL B018 .. LINEAR B SYMBOL B089 return true; - if (codePoint >= 0x10080 && codePoint <= 0x100fa) + if (codePoint >= 0x10080 && codePoint <= 0x100FA) // LINEAR B IDEOGRAM B100 MAN .. LINEAR B IDEOGRAM VESSEL B305 return true; - if (codePoint >= 0x10140 && codePoint <= 0x10174) + if (codePoint >= 0x10140 && codePoint <= 0x10174) // GREEK ACROPHONIC ATTIC ONE QUARTER .. GREEK ACROPHONIC STRATIAN FIFTY MNAS return true; - if (codePoint >= 0x101fd && codePoint <= 0x101fd) + if (codePoint >= 0x101FD && codePoint <= 0x101FD) // PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE .. PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE return true; - if (codePoint >= 0x10280 && codePoint <= 0x1029c) + if (codePoint >= 0x10280 && codePoint <= 0x1029C) // LYCIAN LETTER A .. LYCIAN LETTER X return true; - if (codePoint >= 0x102a0 && codePoint <= 0x102d0) + if (codePoint >= 0x102A0 && codePoint <= 0x102D0) // CARIAN LETTER A .. CARIAN LETTER UUU3 return true; - if (codePoint >= 0x102e0 && codePoint <= 0x102e0) + if (codePoint >= 0x102E0 && codePoint <= 0x102E0) // COPTIC EPACT THOUSANDS MARK .. COPTIC EPACT THOUSANDS MARK return true; - if (codePoint >= 0x10300 && codePoint <= 0x1031f) + if (codePoint >= 0x10300 && codePoint <= 0x1031F) // OLD ITALIC LETTER A .. OLD ITALIC LETTER ESS return true; - if (codePoint >= 0x1032d && codePoint <= 0x1034a) + if (codePoint >= 0x1032D && codePoint <= 0x1034A) // OLD ITALIC LETTER YE .. GOTHIC LETTER NINE HUNDRED return true; - if (codePoint >= 0x10350 && codePoint <= 0x1037a) + if (codePoint >= 0x10350 && codePoint <= 0x1037A) // OLD PERMIC LETTER AN .. COMBINING OLD PERMIC LETTER SII return true; - if (codePoint >= 0x10380 && codePoint <= 0x1039d) + if (codePoint >= 0x10380 && codePoint <= 0x1039D) // UGARITIC LETTER ALPA .. UGARITIC LETTER SSU return true; - if (codePoint >= 0x103a0 && codePoint <= 0x103c3) + if (codePoint >= 0x103A0 && codePoint <= 0x103C3) // OLD PERSIAN SIGN A .. OLD PERSIAN SIGN HA return true; - if (codePoint >= 0x103c8 && codePoint <= 0x103cf) + if (codePoint >= 0x103C8 && codePoint <= 0x103CF) // OLD PERSIAN SIGN AURAMAZDAA .. OLD PERSIAN SIGN BUUMISH return true; - if (codePoint >= 0x103d1 && codePoint <= 0x103d5) + if (codePoint >= 0x103D1 && codePoint <= 0x103D5) // OLD PERSIAN NUMBER ONE .. OLD PERSIAN NUMBER HUNDRED return true; - if (codePoint >= 0x10400 && codePoint <= 0x1049d) + if (codePoint >= 0x10400 && codePoint <= 0x1049D) // DESERET CAPITAL LETTER LONG I .. OSMANYA LETTER OO return true; - if (codePoint >= 0x104a0 && codePoint <= 0x104a9) + if (codePoint >= 0x104A0 && codePoint <= 0x104A9) // OSMANYA DIGIT ZERO .. OSMANYA DIGIT NINE return true; - if (codePoint >= 0x104b0 && codePoint <= 0x104d3) + if (codePoint >= 0x104B0 && codePoint <= 0x104D3) // OSAGE CAPITAL LETTER A .. OSAGE CAPITAL LETTER ZHA return true; - if (codePoint >= 0x104d8 && codePoint <= 0x104fb) + if (codePoint >= 0x104D8 && codePoint <= 0x104FB) // OSAGE SMALL LETTER A .. OSAGE SMALL LETTER ZHA return true; - if (codePoint >= 0x10500 && codePoint <= 0x10527) + if (codePoint >= 0x10500 && codePoint <= 0x10527) // ELBASAN LETTER A .. ELBASAN LETTER KHE return true; - if (codePoint >= 0x10530 && codePoint <= 0x10563) + if (codePoint >= 0x10530 && codePoint <= 0x10563) // CAUCASIAN ALBANIAN LETTER ALT .. CAUCASIAN ALBANIAN LETTER KIW return true; - if (codePoint >= 0x10600 && codePoint <= 0x10736) + if (codePoint >= 0x10600 && codePoint <= 0x10736) // LINEAR A SIGN AB001 .. LINEAR A SIGN A664 return true; - if (codePoint >= 0x10740 && codePoint <= 0x10755) + if (codePoint >= 0x10740 && codePoint <= 0x10755) // LINEAR A SIGN A701 A .. LINEAR A SIGN A732 JE return true; - if (codePoint >= 0x10760 && codePoint <= 0x10767) + if (codePoint >= 0x10760 && codePoint <= 0x10767) // LINEAR A SIGN A800 .. LINEAR A SIGN A807 return true; - if (codePoint >= 0x10800 && codePoint <= 0x10805) + if (codePoint >= 0x10800 && codePoint <= 0x10805) // CYPRIOT SYLLABLE A .. CYPRIOT SYLLABLE JA return true; - if (codePoint >= 0x10808 && codePoint <= 0x10808) + if (codePoint >= 0x10808 && codePoint <= 0x10808) // CYPRIOT SYLLABLE JO .. CYPRIOT SYLLABLE JO return true; - if (codePoint >= 0x1080a && codePoint <= 0x10835) + if (codePoint >= 0x1080A && codePoint <= 0x10835) // CYPRIOT SYLLABLE KA .. CYPRIOT SYLLABLE WO return true; - if (codePoint >= 0x10837 && codePoint <= 0x10838) + if (codePoint >= 0x10837 && codePoint <= 0x10838) // CYPRIOT SYLLABLE XA .. CYPRIOT SYLLABLE XE return true; - if (codePoint >= 0x1083c && codePoint <= 0x1083c) + if (codePoint >= 0x1083C && codePoint <= 0x1083C) // CYPRIOT SYLLABLE ZA .. CYPRIOT SYLLABLE ZA return true; - if (codePoint >= 0x1083f && codePoint <= 0x10855) + if (codePoint >= 0x1083F && codePoint <= 0x10855) // CYPRIOT SYLLABLE ZO .. IMPERIAL ARAMAIC LETTER TAW return true; - if (codePoint >= 0x10860 && codePoint <= 0x10876) + if (codePoint >= 0x10860 && codePoint <= 0x10876) // PALMYRENE LETTER ALEPH .. PALMYRENE LETTER TAW return true; - if (codePoint >= 0x10880 && codePoint <= 0x1089e) + if (codePoint >= 0x10880 && codePoint <= 0x1089E) // NABATAEAN LETTER FINAL ALEPH .. NABATAEAN LETTER TAW return true; - if (codePoint >= 0x108e0 && codePoint <= 0x108f2) + if (codePoint >= 0x108E0 && codePoint <= 0x108F2) // HATRAN LETTER ALEPH .. HATRAN LETTER QOPH return true; - if (codePoint >= 0x108f4 && codePoint <= 0x108f5) + if (codePoint >= 0x108F4 && codePoint <= 0x108F5) // HATRAN LETTER SHIN .. HATRAN LETTER TAW return true; - if (codePoint >= 0x10900 && codePoint <= 0x10915) + if (codePoint >= 0x10900 && codePoint <= 0x10915) // PHOENICIAN LETTER ALF .. PHOENICIAN LETTER TAU return true; - if (codePoint >= 0x10920 && codePoint <= 0x10939) + if (codePoint >= 0x10920 && codePoint <= 0x10939) // LYDIAN LETTER A .. LYDIAN LETTER C return true; - if (codePoint >= 0x10980 && codePoint <= 0x109b7) + if (codePoint >= 0x10980 && codePoint <= 0x109B7) // MEROITIC HIEROGLYPHIC LETTER A .. MEROITIC CURSIVE LETTER DA return true; - if (codePoint >= 0x109be && codePoint <= 0x109bf) + if (codePoint >= 0x109BE && codePoint <= 0x109BF) // MEROITIC CURSIVE LOGOGRAM RMT .. MEROITIC CURSIVE LOGOGRAM IMN return true; - if (codePoint >= 0x10a00 && codePoint <= 0x10a03) + if (codePoint >= 0x10A00 && codePoint <= 0x10A03) // KHAROSHTHI LETTER A .. KHAROSHTHI VOWEL SIGN VOCALIC R return true; - if (codePoint >= 0x10a05 && codePoint <= 0x10a06) + if (codePoint >= 0x10A05 && codePoint <= 0x10A06) // KHAROSHTHI VOWEL SIGN E .. KHAROSHTHI VOWEL SIGN O return true; - if (codePoint >= 0x10a0c && codePoint <= 0x10a13) + if (codePoint >= 0x10A0C && codePoint <= 0x10A13) // KHAROSHTHI VOWEL LENGTH MARK .. KHAROSHTHI LETTER GHA return true; - if (codePoint >= 0x10a15 && codePoint <= 0x10a17) + if (codePoint >= 0x10A15 && codePoint <= 0x10A17) // KHAROSHTHI LETTER CA .. KHAROSHTHI LETTER JA return true; - if (codePoint >= 0x10a19 && codePoint <= 0x10a35) + if (codePoint >= 0x10A19 && codePoint <= 0x10A35) // KHAROSHTHI LETTER NYA .. KHAROSHTHI LETTER VHA return true; - if (codePoint >= 0x10a38 && codePoint <= 0x10a3a) + if (codePoint >= 0x10A38 && codePoint <= 0x10A3A) // KHAROSHTHI SIGN BAR ABOVE .. KHAROSHTHI SIGN DOT BELOW return true; - if (codePoint >= 0x10a3f && codePoint <= 0x10a3f) + if (codePoint >= 0x10A3F && codePoint <= 0x10A3F) // KHAROSHTHI VIRAMA .. KHAROSHTHI VIRAMA return true; - if (codePoint >= 0x10a60 && codePoint <= 0x10a7c) + if (codePoint >= 0x10A60 && codePoint <= 0x10A7C) // OLD SOUTH ARABIAN LETTER HE .. OLD SOUTH ARABIAN LETTER THETH return true; - if (codePoint >= 0x10a80 && codePoint <= 0x10a9c) + if (codePoint >= 0x10A80 && codePoint <= 0x10A9C) // OLD NORTH ARABIAN LETTER HEH .. OLD NORTH ARABIAN LETTER ZAH return true; - if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7) + if (codePoint >= 0x10AC0 && codePoint <= 0x10AC7) // MANICHAEAN LETTER ALEPH .. MANICHAEAN LETTER WAW return true; - if (codePoint >= 0x10ac9 && codePoint <= 0x10ae6) + if (codePoint >= 0x10AC9 && codePoint <= 0x10AE6) // MANICHAEAN LETTER ZAYIN .. MANICHAEAN ABBREVIATION MARK BELOW return true; - if (codePoint >= 0x10b00 && codePoint <= 0x10b35) + if (codePoint >= 0x10B00 && codePoint <= 0x10B35) // AVESTAN LETTER A .. AVESTAN LETTER HE return true; - if (codePoint >= 0x10b40 && codePoint <= 0x10b55) + if (codePoint >= 0x10B40 && codePoint <= 0x10B55) // INSCRIPTIONAL PARTHIAN LETTER ALEPH .. INSCRIPTIONAL PARTHIAN LETTER TAW return true; - if (codePoint >= 0x10b60 && codePoint <= 0x10b72) + if (codePoint >= 0x10B60 && codePoint <= 0x10B72) // INSCRIPTIONAL PAHLAVI LETTER ALEPH .. INSCRIPTIONAL PAHLAVI LETTER TAW return true; - if (codePoint >= 0x10b80 && codePoint <= 0x10b91) + if (codePoint >= 0x10B80 && codePoint <= 0x10B91) // PSALTER PAHLAVI LETTER ALEPH .. PSALTER PAHLAVI LETTER TAW return true; - if (codePoint >= 0x10c00 && codePoint <= 0x10c48) + if (codePoint >= 0x10C00 && codePoint <= 0x10C48) // OLD TURKIC LETTER ORKHON A .. OLD TURKIC LETTER ORKHON BASH return true; - if (codePoint >= 0x10c80 && codePoint <= 0x10cb2) + if (codePoint >= 0x10C80 && codePoint <= 0x10CB2) // OLD HUNGARIAN CAPITAL LETTER A .. OLD HUNGARIAN CAPITAL LETTER US return true; - if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2) + if (codePoint >= 0x10CC0 && codePoint <= 0x10CF2) // OLD HUNGARIAN SMALL LETTER A .. OLD HUNGARIAN SMALL LETTER US return true; - if (codePoint >= 0x10d00 && codePoint <= 0x10d27) + if (codePoint >= 0x10D00 && codePoint <= 0x10D27) // HANIFI ROHINGYA LETTER A .. HANIFI ROHINGYA SIGN TASSI return true; - if (codePoint >= 0x10d30 && codePoint <= 0x10d39) + if (codePoint >= 0x10D30 && codePoint <= 0x10D39) // HANIFI ROHINGYA DIGIT ZERO .. HANIFI ROHINGYA DIGIT NINE return true; - if (codePoint >= 0x10f00 && codePoint <= 0x10f1c) + if (codePoint >= 0x10F00 && codePoint <= 0x10F1C) // OLD SOGDIAN LETTER ALEPH .. OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL return true; - if (codePoint >= 0x10f27 && codePoint <= 0x10f27) + if (codePoint >= 0x10F27 && codePoint <= 0x10F27) // OLD SOGDIAN LIGATURE AYIN-DALETH .. OLD SOGDIAN LIGATURE AYIN-DALETH return true; - if (codePoint >= 0x10f30 && codePoint <= 0x10f50) + if (codePoint >= 0x10F30 && codePoint <= 0x10F50) // SOGDIAN LETTER ALEPH .. SOGDIAN COMBINING STROKE BELOW return true; - if (codePoint >= 0x11000 && codePoint <= 0x11046) + if (codePoint >= 0x11000 && codePoint <= 0x11046) // BRAHMI SIGN CANDRABINDU .. BRAHMI VIRAMA return true; - if (codePoint >= 0x11066 && codePoint <= 0x1106f) + if (codePoint >= 0x11066 && codePoint <= 0x1106F) // BRAHMI DIGIT ZERO .. BRAHMI DIGIT NINE return true; - if (codePoint >= 0x1107f && codePoint <= 0x110ba) + if (codePoint >= 0x1107F && codePoint <= 0x110BA) // BRAHMI NUMBER JOINER .. KAITHI SIGN NUKTA return true; - if (codePoint >= 0x110d0 && codePoint <= 0x110e8) + if (codePoint >= 0x110D0 && codePoint <= 0x110E8) // SORA SOMPENG LETTER SAH .. SORA SOMPENG LETTER MAE return true; - if (codePoint >= 0x110f0 && codePoint <= 0x110f9) + if (codePoint >= 0x110F0 && codePoint <= 0x110F9) // SORA SOMPENG DIGIT ZERO .. SORA SOMPENG DIGIT NINE return true; - if (codePoint >= 0x11100 && codePoint <= 0x11134) + if (codePoint >= 0x11100 && codePoint <= 0x11134) // CHAKMA SIGN CANDRABINDU .. CHAKMA MAAYYAA return true; - if (codePoint >= 0x11136 && codePoint <= 0x1113f) + if (codePoint >= 0x11136 && codePoint <= 0x1113F) // CHAKMA DIGIT ZERO .. CHAKMA DIGIT NINE return true; - if (codePoint >= 0x11144 && codePoint <= 0x11146) + if (codePoint >= 0x11144 && codePoint <= 0x11146) // CHAKMA LETTER LHAA .. CHAKMA VOWEL SIGN EI return true; - if (codePoint >= 0x11150 && codePoint <= 0x11173) + if (codePoint >= 0x11150 && codePoint <= 0x11173) // MAHAJANI LETTER A .. MAHAJANI SIGN NUKTA return true; - if (codePoint >= 0x11176 && codePoint <= 0x11176) + if (codePoint >= 0x11176 && codePoint <= 0x11176) // MAHAJANI LIGATURE SHRI .. MAHAJANI LIGATURE SHRI return true; - if (codePoint >= 0x11180 && codePoint <= 0x111c4) + if (codePoint >= 0x11180 && codePoint <= 0x111C4) // SHARADA SIGN CANDRABINDU .. SHARADA OM return true; - if (codePoint >= 0x111c9 && codePoint <= 0x111cc) + if (codePoint >= 0x111C9 && codePoint <= 0x111CC) // SHARADA SANDHI MARK .. SHARADA EXTRA SHORT VOWEL MARK return true; - if (codePoint >= 0x111d0 && codePoint <= 0x111da) + if (codePoint >= 0x111D0 && codePoint <= 0x111DA) // SHARADA DIGIT ZERO .. SHARADA EKAM return true; - if (codePoint >= 0x111dc && codePoint <= 0x111dc) + if (codePoint >= 0x111DC && codePoint <= 0x111DC) // SHARADA HEADSTROKE .. SHARADA HEADSTROKE return true; - if (codePoint >= 0x11200 && codePoint <= 0x11211) + if (codePoint >= 0x11200 && codePoint <= 0x11211) // KHOJKI LETTER A .. KHOJKI LETTER JJA return true; - if (codePoint >= 0x11213 && codePoint <= 0x11237) + if (codePoint >= 0x11213 && codePoint <= 0x11237) // KHOJKI LETTER NYA .. KHOJKI SIGN SHADDA return true; - if (codePoint >= 0x1123e && codePoint <= 0x1123e) + if (codePoint >= 0x1123E && codePoint <= 0x1123E) // KHOJKI SIGN SUKUN .. KHOJKI SIGN SUKUN return true; - if (codePoint >= 0x11280 && codePoint <= 0x11286) + if (codePoint >= 0x11280 && codePoint <= 0x11286) // MULTANI LETTER A .. MULTANI LETTER GA return true; - if (codePoint >= 0x11288 && codePoint <= 0x11288) + if (codePoint >= 0x11288 && codePoint <= 0x11288) // MULTANI LETTER GHA .. MULTANI LETTER GHA return true; - if (codePoint >= 0x1128a && codePoint <= 0x1128d) + if (codePoint >= 0x1128A && codePoint <= 0x1128D) // MULTANI LETTER CA .. MULTANI LETTER JJA return true; - if (codePoint >= 0x1128f && codePoint <= 0x1129d) + if (codePoint >= 0x1128F && codePoint <= 0x1129D) // MULTANI LETTER NYA .. MULTANI LETTER BA return true; - if (codePoint >= 0x1129f && codePoint <= 0x112a8) + if (codePoint >= 0x1129F && codePoint <= 0x112A8) // MULTANI LETTER BHA .. MULTANI LETTER RHA return true; - if (codePoint >= 0x112b0 && codePoint <= 0x112ea) + if (codePoint >= 0x112B0 && codePoint <= 0x112EA) // KHUDAWADI LETTER A .. KHUDAWADI SIGN VIRAMA return true; - if (codePoint >= 0x112f0 && codePoint <= 0x112f9) + if (codePoint >= 0x112F0 && codePoint <= 0x112F9) // KHUDAWADI DIGIT ZERO .. KHUDAWADI DIGIT NINE return true; - if (codePoint >= 0x11300 && codePoint <= 0x11303) + if (codePoint >= 0x11300 && codePoint <= 0x11303) // GRANTHA SIGN COMBINING ANUSVARA ABOVE .. GRANTHA SIGN VISARGA return true; - if (codePoint >= 0x11305 && codePoint <= 0x1130c) + if (codePoint >= 0x11305 && codePoint <= 0x1130C) // GRANTHA LETTER A .. GRANTHA LETTER VOCALIC L return true; - if (codePoint >= 0x1130f && codePoint <= 0x11310) + if (codePoint >= 0x1130F && codePoint <= 0x11310) // GRANTHA LETTER EE .. GRANTHA LETTER AI return true; - if (codePoint >= 0x11313 && codePoint <= 0x11328) + if (codePoint >= 0x11313 && codePoint <= 0x11328) // GRANTHA LETTER OO .. GRANTHA LETTER NA return true; - if (codePoint >= 0x1132a && codePoint <= 0x11330) + if (codePoint >= 0x1132A && codePoint <= 0x11330) // GRANTHA LETTER PA .. GRANTHA LETTER RA return true; - if (codePoint >= 0x11332 && codePoint <= 0x11333) + if (codePoint >= 0x11332 && codePoint <= 0x11333) // GRANTHA LETTER LA .. GRANTHA LETTER LLA return true; - if (codePoint >= 0x11335 && codePoint <= 0x11339) + if (codePoint >= 0x11335 && codePoint <= 0x11339) // GRANTHA LETTER VA .. GRANTHA LETTER HA return true; - if (codePoint >= 0x1133b && codePoint <= 0x11344) + if (codePoint >= 0x1133B && codePoint <= 0x11344) // COMBINING BINDU BELOW .. GRANTHA VOWEL SIGN VOCALIC RR return true; - if (codePoint >= 0x11347 && codePoint <= 0x11348) + if (codePoint >= 0x11347 && codePoint <= 0x11348) // GRANTHA VOWEL SIGN EE .. GRANTHA VOWEL SIGN AI return true; - if (codePoint >= 0x1134b && codePoint <= 0x1134d) + if (codePoint >= 0x1134B && codePoint <= 0x1134D) // GRANTHA VOWEL SIGN OO .. GRANTHA SIGN VIRAMA return true; - if (codePoint >= 0x11350 && codePoint <= 0x11350) + if (codePoint >= 0x11350 && codePoint <= 0x11350) // GRANTHA OM .. GRANTHA OM return true; - if (codePoint >= 0x11357 && codePoint <= 0x11357) + if (codePoint >= 0x11357 && codePoint <= 0x11357) // GRANTHA AU LENGTH MARK .. GRANTHA AU LENGTH MARK return true; - if (codePoint >= 0x1135d && codePoint <= 0x11363) + if (codePoint >= 0x1135D && codePoint <= 0x11363) // GRANTHA SIGN PLUTA .. GRANTHA VOWEL SIGN VOCALIC LL return true; - if (codePoint >= 0x11366 && codePoint <= 0x1136c) + if (codePoint >= 0x11366 && codePoint <= 0x1136C) // COMBINING GRANTHA DIGIT ZERO .. COMBINING GRANTHA DIGIT SIX return true; - if (codePoint >= 0x11370 && codePoint <= 0x11374) + if (codePoint >= 0x11370 && codePoint <= 0x11374) // COMBINING GRANTHA LETTER A .. COMBINING GRANTHA LETTER PA return true; - if (codePoint >= 0x11400 && codePoint <= 0x1144a) + if (codePoint >= 0x11400 && codePoint <= 0x1144A) // NEWA LETTER A .. NEWA SIDDHI return true; - if (codePoint >= 0x11450 && codePoint <= 0x11459) + if (codePoint >= 0x11450 && codePoint <= 0x11459) // NEWA DIGIT ZERO .. NEWA DIGIT NINE return true; - if (codePoint >= 0x1145e && codePoint <= 0x1145e) + if (codePoint >= 0x1145E && codePoint <= 0x1145E) // NEWA SANDHI MARK .. NEWA SANDHI MARK return true; - if (codePoint >= 0x11480 && codePoint <= 0x114c5) + if (codePoint >= 0x11480 && codePoint <= 0x114C5) // TIRHUTA ANJI .. TIRHUTA GVANG return true; - if (codePoint >= 0x114c7 && codePoint <= 0x114c7) + if (codePoint >= 0x114C7 && codePoint <= 0x114C7) // TIRHUTA OM .. TIRHUTA OM return true; - if (codePoint >= 0x114d0 && codePoint <= 0x114d9) + if (codePoint >= 0x114D0 && codePoint <= 0x114D9) // TIRHUTA DIGIT ZERO .. TIRHUTA DIGIT NINE return true; - if (codePoint >= 0x11580 && codePoint <= 0x115b5) + if (codePoint >= 0x11580 && codePoint <= 0x115B5) // SIDDHAM LETTER A .. SIDDHAM VOWEL SIGN VOCALIC RR return true; - if (codePoint >= 0x115b8 && codePoint <= 0x115c0) + if (codePoint >= 0x115B8 && codePoint <= 0x115C0) // SIDDHAM VOWEL SIGN E .. SIDDHAM SIGN NUKTA return true; - if (codePoint >= 0x115d8 && codePoint <= 0x115dd) + if (codePoint >= 0x115D8 && codePoint <= 0x115DD) // SIDDHAM LETTER THREE-CIRCLE ALTERNATE I .. SIDDHAM VOWEL SIGN ALTERNATE UU return true; - if (codePoint >= 0x11600 && codePoint <= 0x11640) + if (codePoint >= 0x11600 && codePoint <= 0x11640) // MODI LETTER A .. MODI SIGN ARDHACANDRA return true; - if (codePoint >= 0x11644 && codePoint <= 0x11644) + if (codePoint >= 0x11644 && codePoint <= 0x11644) // MODI SIGN HUVA .. MODI SIGN HUVA return true; - if (codePoint >= 0x11650 && codePoint <= 0x11659) + if (codePoint >= 0x11650 && codePoint <= 0x11659) // MODI DIGIT ZERO .. MODI DIGIT NINE return true; - if (codePoint >= 0x11680 && codePoint <= 0x116b7) + if (codePoint >= 0x11680 && codePoint <= 0x116B7) // TAKRI LETTER A .. TAKRI SIGN NUKTA return true; - if (codePoint >= 0x116c0 && codePoint <= 0x116c9) + if (codePoint >= 0x116C0 && codePoint <= 0x116C9) // TAKRI DIGIT ZERO .. TAKRI DIGIT NINE return true; - if (codePoint >= 0x11700 && codePoint <= 0x1171a) + if (codePoint >= 0x11700 && codePoint <= 0x1171A) // AHOM LETTER KA .. AHOM LETTER ALTERNATE BA return true; - if (codePoint >= 0x1171d && codePoint <= 0x1172b) + if (codePoint >= 0x1171D && codePoint <= 0x1172B) // AHOM CONSONANT SIGN MEDIAL LA .. AHOM SIGN KILLER return true; - if (codePoint >= 0x11730 && codePoint <= 0x11739) + if (codePoint >= 0x11730 && codePoint <= 0x11739) // AHOM DIGIT ZERO .. AHOM DIGIT NINE return true; - if (codePoint >= 0x11800 && codePoint <= 0x1183a) + if (codePoint >= 0x11800 && codePoint <= 0x1183A) // DOGRA LETTER A .. DOGRA SIGN NUKTA return true; - if (codePoint >= 0x118a0 && codePoint <= 0x118e9) + if (codePoint >= 0x118A0 && codePoint <= 0x118E9) // WARANG CITI CAPITAL LETTER NGAA .. WARANG CITI DIGIT NINE return true; - if (codePoint >= 0x118ff && codePoint <= 0x118ff) + if (codePoint >= 0x118FF && codePoint <= 0x118FF) // WARANG CITI OM .. WARANG CITI OM return true; - if (codePoint >= 0x11a00 && codePoint <= 0x11a3e) + if (codePoint >= 0x11A00 && codePoint <= 0x11A3E) // ZANABAZAR SQUARE LETTER A .. ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA return true; - if (codePoint >= 0x11a47 && codePoint <= 0x11a47) + if (codePoint >= 0x11A47 && codePoint <= 0x11A47) // ZANABAZAR SQUARE SUBJOINER .. ZANABAZAR SQUARE SUBJOINER return true; - if (codePoint >= 0x11a50 && codePoint <= 0x11a83) + if (codePoint >= 0x11A50 && codePoint <= 0x11A83) // SOYOMBO LETTER A .. SOYOMBO LETTER KSSA return true; - if (codePoint >= 0x11a86 && codePoint <= 0x11a99) + if (codePoint >= 0x11A86 && codePoint <= 0x11A99) // SOYOMBO CLUSTER-INITIAL LETTER RA .. SOYOMBO SUBJOINER return true; - if (codePoint >= 0x11a9d && codePoint <= 0x11a9d) + if (codePoint >= 0x11A9D && codePoint <= 0x11A9D) // SOYOMBO MARK PLUTA .. SOYOMBO MARK PLUTA return true; - if (codePoint >= 0x11ac0 && codePoint <= 0x11af8) + if (codePoint >= 0x11AC0 && codePoint <= 0x11AF8) // PAU CIN HAU LETTER PA .. PAU CIN HAU GLOTTAL STOP FINAL return true; - if (codePoint >= 0x11c00 && codePoint <= 0x11c08) + if (codePoint >= 0x11C00 && codePoint <= 0x11C08) // BHAIKSUKI LETTER A .. BHAIKSUKI LETTER VOCALIC L return true; - if (codePoint >= 0x11c0a && codePoint <= 0x11c36) + if (codePoint >= 0x11C0A && codePoint <= 0x11C36) // BHAIKSUKI LETTER E .. BHAIKSUKI VOWEL SIGN VOCALIC L return true; - if (codePoint >= 0x11c38 && codePoint <= 0x11c40) + if (codePoint >= 0x11C38 && codePoint <= 0x11C40) // BHAIKSUKI VOWEL SIGN E .. BHAIKSUKI SIGN AVAGRAHA return true; - if (codePoint >= 0x11c50 && codePoint <= 0x11c59) + if (codePoint >= 0x11C50 && codePoint <= 0x11C59) // BHAIKSUKI DIGIT ZERO .. BHAIKSUKI DIGIT NINE return true; - if (codePoint >= 0x11c72 && codePoint <= 0x11c8f) + if (codePoint >= 0x11C72 && codePoint <= 0x11C8F) // MARCHEN LETTER KA .. MARCHEN LETTER A return true; - if (codePoint >= 0x11c92 && codePoint <= 0x11ca7) + if (codePoint >= 0x11C92 && codePoint <= 0x11CA7) // MARCHEN SUBJOINED LETTER KA .. MARCHEN SUBJOINED LETTER ZA return true; - if (codePoint >= 0x11ca9 && codePoint <= 0x11cb6) + if (codePoint >= 0x11CA9 && codePoint <= 0x11CB6) // MARCHEN SUBJOINED LETTER YA .. MARCHEN SIGN CANDRABINDU return true; - if (codePoint >= 0x11d00 && codePoint <= 0x11d06) + if (codePoint >= 0x11D00 && codePoint <= 0x11D06) // MASARAM GONDI LETTER A .. MASARAM GONDI LETTER E return true; - if (codePoint >= 0x11d08 && codePoint <= 0x11d09) + if (codePoint >= 0x11D08 && codePoint <= 0x11D09) // MASARAM GONDI LETTER AI .. MASARAM GONDI LETTER O return true; - if (codePoint >= 0x11d0b && codePoint <= 0x11d36) + if (codePoint >= 0x11D0B && codePoint <= 0x11D36) // MASARAM GONDI LETTER AU .. MASARAM GONDI VOWEL SIGN VOCALIC R return true; - if (codePoint >= 0x11d3a && codePoint <= 0x11d3a) + if (codePoint >= 0x11D3A && codePoint <= 0x11D3A) // MASARAM GONDI VOWEL SIGN E .. MASARAM GONDI VOWEL SIGN E return true; - if (codePoint >= 0x11d3c && codePoint <= 0x11d3d) + if (codePoint >= 0x11D3C && codePoint <= 0x11D3D) // MASARAM GONDI VOWEL SIGN AI .. MASARAM GONDI VOWEL SIGN O return true; - if (codePoint >= 0x11d3f && codePoint <= 0x11d47) + if (codePoint >= 0x11D3F && codePoint <= 0x11D47) // MASARAM GONDI VOWEL SIGN AU .. MASARAM GONDI RA-KARA return true; - if (codePoint >= 0x11d50 && codePoint <= 0x11d59) + if (codePoint >= 0x11D50 && codePoint <= 0x11D59) // MASARAM GONDI DIGIT ZERO .. MASARAM GONDI DIGIT NINE return true; - if (codePoint >= 0x11d60 && codePoint <= 0x11d65) + if (codePoint >= 0x11D60 && codePoint <= 0x11D65) // GUNJALA GONDI LETTER A .. GUNJALA GONDI LETTER UU return true; - if (codePoint >= 0x11d67 && codePoint <= 0x11d68) + if (codePoint >= 0x11D67 && codePoint <= 0x11D68) // GUNJALA GONDI LETTER EE .. GUNJALA GONDI LETTER AI return true; - if (codePoint >= 0x11d6a && codePoint <= 0x11d8e) + if (codePoint >= 0x11D6A && codePoint <= 0x11D8E) // GUNJALA GONDI LETTER OO .. GUNJALA GONDI VOWEL SIGN UU return true; - if (codePoint >= 0x11d90 && codePoint <= 0x11d91) + if (codePoint >= 0x11D90 && codePoint <= 0x11D91) // GUNJALA GONDI VOWEL SIGN EE .. GUNJALA GONDI VOWEL SIGN AI return true; - if (codePoint >= 0x11d93 && codePoint <= 0x11d98) + if (codePoint >= 0x11D93 && codePoint <= 0x11D98) // GUNJALA GONDI VOWEL SIGN OO .. GUNJALA GONDI OM return true; - if (codePoint >= 0x11da0 && codePoint <= 0x11da9) + if (codePoint >= 0x11DA0 && codePoint <= 0x11DA9) // GUNJALA GONDI DIGIT ZERO .. GUNJALA GONDI DIGIT NINE return true; - if (codePoint >= 0x11ee0 && codePoint <= 0x11ef6) + if (codePoint >= 0x11EE0 && codePoint <= 0x11EF6) // MAKASAR LETTER KA .. MAKASAR VOWEL SIGN O return true; - if (codePoint >= 0x12000 && codePoint <= 0x12399) + if (codePoint >= 0x12000 && codePoint <= 0x12399) // CUNEIFORM SIGN A .. CUNEIFORM SIGN U U return true; - if (codePoint >= 0x12400 && codePoint <= 0x1246e) + if (codePoint >= 0x12400 && codePoint <= 0x1246E) // CUNEIFORM NUMERIC SIGN TWO ASH .. CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM return true; - if (codePoint >= 0x12480 && codePoint <= 0x12543) + if (codePoint >= 0x12480 && codePoint <= 0x12543) // CUNEIFORM SIGN AB TIMES NUN TENU .. CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU return true; - if (codePoint >= 0x13000 && codePoint <= 0x1342e) + if (codePoint >= 0x13000 && codePoint <= 0x1342E) // EGYPTIAN HIEROGLYPH A001 .. EGYPTIAN HIEROGLYPH AA032 return true; - if (codePoint >= 0x14400 && codePoint <= 0x14646) + if (codePoint >= 0x14400 && codePoint <= 0x14646) // ANATOLIAN HIEROGLYPH A001 .. ANATOLIAN HIEROGLYPH A530 return true; - if (codePoint >= 0x16800 && codePoint <= 0x16a38) + if (codePoint >= 0x16800 && codePoint <= 0x16A38) // BAMUM LETTER PHASE-A NGKUE MFON .. BAMUM LETTER PHASE-F VUEQ return true; - if (codePoint >= 0x16a40 && codePoint <= 0x16a5e) + if (codePoint >= 0x16A40 && codePoint <= 0x16A5E) // MRO LETTER TA .. MRO LETTER TEK return true; - if (codePoint >= 0x16a60 && codePoint <= 0x16a69) + if (codePoint >= 0x16A60 && codePoint <= 0x16A69) // MRO DIGIT ZERO .. MRO DIGIT NINE return true; - if (codePoint >= 0x16ad0 && codePoint <= 0x16aed) + if (codePoint >= 0x16AD0 && codePoint <= 0x16AED) // BASSA VAH LETTER ENNI .. BASSA VAH LETTER I return true; - if (codePoint >= 0x16af0 && codePoint <= 0x16af4) + if (codePoint >= 0x16AF0 && codePoint <= 0x16AF4) // BASSA VAH COMBINING HIGH TONE .. BASSA VAH COMBINING HIGH-LOW TONE return true; - if (codePoint >= 0x16b00 && codePoint <= 0x16b36) + if (codePoint >= 0x16B00 && codePoint <= 0x16B36) // PAHAWH HMONG VOWEL KEEB .. PAHAWH HMONG MARK CIM TAUM return true; - if (codePoint >= 0x16b40 && codePoint <= 0x16b43) + if (codePoint >= 0x16B40 && codePoint <= 0x16B43) // PAHAWH HMONG SIGN VOS SEEV .. PAHAWH HMONG SIGN IB YAM return true; - if (codePoint >= 0x16b50 && codePoint <= 0x16b59) + if (codePoint >= 0x16B50 && codePoint <= 0x16B59) // PAHAWH HMONG DIGIT ZERO .. PAHAWH HMONG DIGIT NINE return true; - if (codePoint >= 0x16b63 && codePoint <= 0x16b77) + if (codePoint >= 0x16B63 && codePoint <= 0x16B77) // PAHAWH HMONG SIGN VOS LUB .. PAHAWH HMONG SIGN CIM NRES TOS return true; - if (codePoint >= 0x16b7d && codePoint <= 0x16b8f) + if (codePoint >= 0x16B7D && codePoint <= 0x16B8F) // PAHAWH HMONG CLAN SIGN TSHEEJ .. PAHAWH HMONG CLAN SIGN VWJ return true; - if (codePoint >= 0x16e40 && codePoint <= 0x16e7f) + if (codePoint >= 0x16E40 && codePoint <= 0x16E7F) // MEDEFAIDRIN CAPITAL LETTER M .. MEDEFAIDRIN SMALL LETTER Y return true; - if (codePoint >= 0x16f00 && codePoint <= 0x16f44) + if (codePoint >= 0x16F00 && codePoint <= 0x16F44) // MIAO LETTER PA .. MIAO LETTER HHA return true; - if (codePoint >= 0x16f50 && codePoint <= 0x16f7e) + if (codePoint >= 0x16F50 && codePoint <= 0x16F7E) // MIAO LETTER NASALIZATION .. MIAO VOWEL SIGN NG return true; - if (codePoint >= 0x16f8f && codePoint <= 0x16f9f) + if (codePoint >= 0x16F8F && codePoint <= 0x16F9F) // MIAO TONE RIGHT .. MIAO LETTER REFORMED TONE-8 return true; - if (codePoint >= 0x16fe0 && codePoint <= 0x16fe1) + if (codePoint >= 0x16FE0 && codePoint <= 0x16FE1) // TANGUT ITERATION MARK .. NUSHU ITERATION MARK return true; - if (codePoint >= 0x17000 && codePoint <= 0x187f1) + if (codePoint >= 0x17000 && codePoint <= 0x187F1) // Tangut Ideograph .. Tangut Ideograph return true; - if (codePoint >= 0x18800 && codePoint <= 0x18af2) + if (codePoint >= 0x18800 && codePoint <= 0x18AF2) // TANGUT COMPONENT-001 .. TANGUT COMPONENT-755 return true; - if (codePoint >= 0x1b000 && codePoint <= 0x1b11e) + if (codePoint >= 0x1B000 && codePoint <= 0x1B11E) // KATAKANA LETTER ARCHAIC E .. HENTAIGANA LETTER N-MU-MO-2 return true; - if (codePoint >= 0x1b170 && codePoint <= 0x1b2fb) + if (codePoint >= 0x1B170 && codePoint <= 0x1B2FB) // NUSHU CHARACTER-1B170 .. NUSHU CHARACTER-1B2FB return true; - if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a) + if (codePoint >= 0x1BC00 && codePoint <= 0x1BC6A) // DUPLOYAN LETTER H .. DUPLOYAN LETTER VOCALIC M return true; - if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c) + if (codePoint >= 0x1BC70 && codePoint <= 0x1BC7C) // DUPLOYAN AFFIX LEFT HORIZONTAL SECANT .. DUPLOYAN AFFIX ATTACHED TANGENT HOOK return true; - if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88) + if (codePoint >= 0x1BC80 && codePoint <= 0x1BC88) // DUPLOYAN AFFIX HIGH ACUTE .. DUPLOYAN AFFIX HIGH VERTICAL return true; - if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99) + if (codePoint >= 0x1BC90 && codePoint <= 0x1BC99) // DUPLOYAN AFFIX LOW ACUTE .. DUPLOYAN AFFIX LOW ARROW return true; - if (codePoint >= 0x1bc9d && codePoint <= 0x1bc9e) + if (codePoint >= 0x1BC9D && codePoint <= 0x1BC9E) // DUPLOYAN THICK LETTER SELECTOR .. DUPLOYAN DOUBLE MARK return true; - if (codePoint >= 0x1d165 && codePoint <= 0x1d169) + if (codePoint >= 0x1D165 && codePoint <= 0x1D169) // MUSICAL SYMBOL COMBINING STEM .. MUSICAL SYMBOL COMBINING TREMOLO-3 return true; - if (codePoint >= 0x1d16d && codePoint <= 0x1d172) + if (codePoint >= 0x1D16D && codePoint <= 0x1D172) // MUSICAL SYMBOL COMBINING AUGMENTATION DOT .. MUSICAL SYMBOL COMBINING FLAG-5 return true; - if (codePoint >= 0x1d17b && codePoint <= 0x1d182) + if (codePoint >= 0x1D17B && codePoint <= 0x1D182) // MUSICAL SYMBOL COMBINING ACCENT .. MUSICAL SYMBOL COMBINING LOURE return true; - if (codePoint >= 0x1d185 && codePoint <= 0x1d18b) + if (codePoint >= 0x1D185 && codePoint <= 0x1D18B) // MUSICAL SYMBOL COMBINING DOIT .. MUSICAL SYMBOL COMBINING TRIPLE TONGUE return true; - if (codePoint >= 0x1d1aa && codePoint <= 0x1d1ad) + if (codePoint >= 0x1D1AA && codePoint <= 0x1D1AD) // MUSICAL SYMBOL COMBINING DOWN BOW .. MUSICAL SYMBOL COMBINING SNAP PIZZICATO return true; - if (codePoint >= 0x1d242 && codePoint <= 0x1d244) + if (codePoint >= 0x1D242 && codePoint <= 0x1D244) // COMBINING GREEK MUSICAL TRISEME .. COMBINING GREEK MUSICAL PENTASEME return true; - if (codePoint >= 0x1d400 && codePoint <= 0x1d454) + if (codePoint >= 0x1D400 && codePoint <= 0x1D454) // MATHEMATICAL BOLD CAPITAL A .. MATHEMATICAL ITALIC SMALL G return true; - if (codePoint >= 0x1d456 && codePoint <= 0x1d49c) + if (codePoint >= 0x1D456 && codePoint <= 0x1D49C) // MATHEMATICAL ITALIC SMALL I .. MATHEMATICAL SCRIPT CAPITAL A return true; - if (codePoint >= 0x1d49e && codePoint <= 0x1d49f) + if (codePoint >= 0x1D49E && codePoint <= 0x1D49F) // MATHEMATICAL SCRIPT CAPITAL C .. MATHEMATICAL SCRIPT CAPITAL D return true; - if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2) + if (codePoint >= 0x1D4A2 && codePoint <= 0x1D4A2) // MATHEMATICAL SCRIPT CAPITAL G .. MATHEMATICAL SCRIPT CAPITAL G return true; - if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6) + if (codePoint >= 0x1D4A5 && codePoint <= 0x1D4A6) // MATHEMATICAL SCRIPT CAPITAL J .. MATHEMATICAL SCRIPT CAPITAL K return true; - if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac) + if (codePoint >= 0x1D4A9 && codePoint <= 0x1D4AC) // MATHEMATICAL SCRIPT CAPITAL N .. MATHEMATICAL SCRIPT CAPITAL Q return true; - if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9) + if (codePoint >= 0x1D4AE && codePoint <= 0x1D4B9) // MATHEMATICAL SCRIPT CAPITAL S .. MATHEMATICAL SCRIPT SMALL D return true; - if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb) + if (codePoint >= 0x1D4BB && codePoint <= 0x1D4BB) // MATHEMATICAL SCRIPT SMALL F .. MATHEMATICAL SCRIPT SMALL F return true; - if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3) + if (codePoint >= 0x1D4BD && codePoint <= 0x1D4C3) // MATHEMATICAL SCRIPT SMALL H .. MATHEMATICAL SCRIPT SMALL N return true; - if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505) + if (codePoint >= 0x1D4C5 && codePoint <= 0x1D505) // MATHEMATICAL SCRIPT SMALL P .. MATHEMATICAL FRAKTUR CAPITAL B return true; - if (codePoint >= 0x1d507 && codePoint <= 0x1d50a) + if (codePoint >= 0x1D507 && codePoint <= 0x1D50A) // MATHEMATICAL FRAKTUR CAPITAL D .. MATHEMATICAL FRAKTUR CAPITAL G return true; - if (codePoint >= 0x1d50d && codePoint <= 0x1d514) + if (codePoint >= 0x1D50D && codePoint <= 0x1D514) // MATHEMATICAL FRAKTUR CAPITAL J .. MATHEMATICAL FRAKTUR CAPITAL Q return true; - if (codePoint >= 0x1d516 && codePoint <= 0x1d51c) + if (codePoint >= 0x1D516 && codePoint <= 0x1D51C) // MATHEMATICAL FRAKTUR CAPITAL S .. MATHEMATICAL FRAKTUR CAPITAL Y return true; - if (codePoint >= 0x1d51e && codePoint <= 0x1d539) + if (codePoint >= 0x1D51E && codePoint <= 0x1D539) // MATHEMATICAL FRAKTUR SMALL A .. MATHEMATICAL DOUBLE-STRUCK CAPITAL B return true; - if (codePoint >= 0x1d53b && codePoint <= 0x1d53e) + if (codePoint >= 0x1D53B && codePoint <= 0x1D53E) // MATHEMATICAL DOUBLE-STRUCK CAPITAL D .. MATHEMATICAL DOUBLE-STRUCK CAPITAL G return true; - if (codePoint >= 0x1d540 && codePoint <= 0x1d544) + if (codePoint >= 0x1D540 && codePoint <= 0x1D544) // MATHEMATICAL DOUBLE-STRUCK CAPITAL I .. MATHEMATICAL DOUBLE-STRUCK CAPITAL M return true; - if (codePoint >= 0x1d546 && codePoint <= 0x1d546) + if (codePoint >= 0x1D546 && codePoint <= 0x1D546) // MATHEMATICAL DOUBLE-STRUCK CAPITAL O .. MATHEMATICAL DOUBLE-STRUCK CAPITAL O return true; - if (codePoint >= 0x1d54a && codePoint <= 0x1d550) + if (codePoint >= 0x1D54A && codePoint <= 0x1D550) // MATHEMATICAL DOUBLE-STRUCK CAPITAL S .. MATHEMATICAL DOUBLE-STRUCK CAPITAL Y return true; - if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5) + if (codePoint >= 0x1D552 && codePoint <= 0x1D6A5) // MATHEMATICAL DOUBLE-STRUCK SMALL A .. MATHEMATICAL ITALIC SMALL DOTLESS J return true; - if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0) + if (codePoint >= 0x1D6A8 && codePoint <= 0x1D6C0) // MATHEMATICAL BOLD CAPITAL ALPHA .. MATHEMATICAL BOLD CAPITAL OMEGA return true; - if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da) + if (codePoint >= 0x1D6C2 && codePoint <= 0x1D6DA) // MATHEMATICAL BOLD SMALL ALPHA .. MATHEMATICAL BOLD SMALL OMEGA return true; - if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa) + if (codePoint >= 0x1D6DC && codePoint <= 0x1D6FA) // MATHEMATICAL BOLD EPSILON SYMBOL .. MATHEMATICAL ITALIC CAPITAL OMEGA return true; - if (codePoint >= 0x1d6fc && codePoint <= 0x1d714) + if (codePoint >= 0x1D6FC && codePoint <= 0x1D714) // MATHEMATICAL ITALIC SMALL ALPHA .. MATHEMATICAL ITALIC SMALL OMEGA return true; - if (codePoint >= 0x1d716 && codePoint <= 0x1d734) + if (codePoint >= 0x1D716 && codePoint <= 0x1D734) // MATHEMATICAL ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD ITALIC CAPITAL OMEGA return true; - if (codePoint >= 0x1d736 && codePoint <= 0x1d74e) + if (codePoint >= 0x1D736 && codePoint <= 0x1D74E) // MATHEMATICAL BOLD ITALIC SMALL ALPHA .. MATHEMATICAL BOLD ITALIC SMALL OMEGA return true; - if (codePoint >= 0x1d750 && codePoint <= 0x1d76e) + if (codePoint >= 0x1D750 && codePoint <= 0x1D76E) // MATHEMATICAL BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA return true; - if (codePoint >= 0x1d770 && codePoint <= 0x1d788) + if (codePoint >= 0x1D770 && codePoint <= 0x1D788) // MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA return true; - if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8) + if (codePoint >= 0x1D78A && codePoint <= 0x1D7A8) // MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA return true; - if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2) + if (codePoint >= 0x1D7AA && codePoint <= 0x1D7C2) // MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA return true; - if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb) + if (codePoint >= 0x1D7C4 && codePoint <= 0x1D7CB) // MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD SMALL DIGAMMA return true; - if (codePoint >= 0x1d7ce && codePoint <= 0x1d7ff) + if (codePoint >= 0x1D7CE && codePoint <= 0x1D7FF) // MATHEMATICAL BOLD DIGIT ZERO .. MATHEMATICAL MONOSPACE DIGIT NINE return true; - if (codePoint >= 0x1da00 && codePoint <= 0x1da36) + if (codePoint >= 0x1DA00 && codePoint <= 0x1DA36) // SIGNWRITING HEAD RIM .. SIGNWRITING AIR SUCKING IN return true; - if (codePoint >= 0x1da3b && codePoint <= 0x1da6c) + if (codePoint >= 0x1DA3B && codePoint <= 0x1DA6C) // SIGNWRITING MOUTH CLOSED NEUTRAL .. SIGNWRITING EXCITEMENT return true; - if (codePoint >= 0x1da75 && codePoint <= 0x1da75) + if (codePoint >= 0x1DA75 && codePoint <= 0x1DA75) // SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS .. SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS return true; - if (codePoint >= 0x1da84 && codePoint <= 0x1da84) + if (codePoint >= 0x1DA84 && codePoint <= 0x1DA84) // SIGNWRITING LOCATION HEAD NECK .. SIGNWRITING LOCATION HEAD NECK return true; - if (codePoint >= 0x1da9b && codePoint <= 0x1da9f) + if (codePoint >= 0x1DA9B && codePoint <= 0x1DA9F) // SIGNWRITING FILL MODIFIER-2 .. SIGNWRITING FILL MODIFIER-6 return true; - if (codePoint >= 0x1daa1 && codePoint <= 0x1daaf) + if (codePoint >= 0x1DAA1 && codePoint <= 0x1DAAF) // SIGNWRITING ROTATION MODIFIER-2 .. SIGNWRITING ROTATION MODIFIER-16 return true; - if (codePoint >= 0x1e000 && codePoint <= 0x1e006) + if (codePoint >= 0x1E000 && codePoint <= 0x1E006) // COMBINING GLAGOLITIC LETTER AZU .. COMBINING GLAGOLITIC LETTER ZHIVETE return true; - if (codePoint >= 0x1e008 && codePoint <= 0x1e018) + if (codePoint >= 0x1E008 && codePoint <= 0x1E018) // COMBINING GLAGOLITIC LETTER ZEMLJA .. COMBINING GLAGOLITIC LETTER HERU return true; - if (codePoint >= 0x1e01b && codePoint <= 0x1e021) + if (codePoint >= 0x1E01B && codePoint <= 0x1E021) // COMBINING GLAGOLITIC LETTER SHTA .. COMBINING GLAGOLITIC LETTER YATI return true; - if (codePoint >= 0x1e023 && codePoint <= 0x1e024) + if (codePoint >= 0x1E023 && codePoint <= 0x1E024) // COMBINING GLAGOLITIC LETTER YU .. COMBINING GLAGOLITIC LETTER SMALL YUS return true; - if (codePoint >= 0x1e026 && codePoint <= 0x1e02a) + if (codePoint >= 0x1E026 && codePoint <= 0x1E02A) // COMBINING GLAGOLITIC LETTER YO .. COMBINING GLAGOLITIC LETTER FITA return true; - if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4) + if (codePoint >= 0x1E800 && codePoint <= 0x1E8C4) // MENDE KIKAKUI SYLLABLE M001 KI .. MENDE KIKAKUI SYLLABLE M060 NYON return true; - if (codePoint >= 0x1e8d0 && codePoint <= 0x1e8d6) + if (codePoint >= 0x1E8D0 && codePoint <= 0x1E8D6) // MENDE KIKAKUI COMBINING NUMBER TEENS .. MENDE KIKAKUI COMBINING NUMBER MILLIONS return true; - if (codePoint >= 0x1e900 && codePoint <= 0x1e94a) + if (codePoint >= 0x1E900 && codePoint <= 0x1E94A) // ADLAM CAPITAL LETTER ALIF .. ADLAM NUKTA return true; - if (codePoint >= 0x1e950 && codePoint <= 0x1e959) + if (codePoint >= 0x1E950 && codePoint <= 0x1E959) // ADLAM DIGIT ZERO .. ADLAM DIGIT NINE return true; - if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03) + if (codePoint >= 0x1EE00 && codePoint <= 0x1EE03) // ARABIC MATHEMATICAL ALEF .. ARABIC MATHEMATICAL DAL return true; - if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f) + if (codePoint >= 0x1EE05 && codePoint <= 0x1EE1F) // ARABIC MATHEMATICAL WAW .. ARABIC MATHEMATICAL DOTLESS QAF return true; - if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22) + if (codePoint >= 0x1EE21 && codePoint <= 0x1EE22) // ARABIC MATHEMATICAL INITIAL BEH .. ARABIC MATHEMATICAL INITIAL JEEM return true; - if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24) + if (codePoint >= 0x1EE24 && codePoint <= 0x1EE24) // ARABIC MATHEMATICAL INITIAL HEH .. ARABIC MATHEMATICAL INITIAL HEH return true; - if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27) + if (codePoint >= 0x1EE27 && codePoint <= 0x1EE27) // ARABIC MATHEMATICAL INITIAL HAH .. ARABIC MATHEMATICAL INITIAL HAH return true; - if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32) + if (codePoint >= 0x1EE29 && codePoint <= 0x1EE32) // ARABIC MATHEMATICAL INITIAL YEH .. ARABIC MATHEMATICAL INITIAL QAF return true; - if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37) + if (codePoint >= 0x1EE34 && codePoint <= 0x1EE37) // ARABIC MATHEMATICAL INITIAL SHEEN .. ARABIC MATHEMATICAL INITIAL KHAH return true; - if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39) + if (codePoint >= 0x1EE39 && codePoint <= 0x1EE39) // ARABIC MATHEMATICAL INITIAL DAD .. ARABIC MATHEMATICAL INITIAL DAD return true; - if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b) + if (codePoint >= 0x1EE3B && codePoint <= 0x1EE3B) // ARABIC MATHEMATICAL INITIAL GHAIN .. ARABIC MATHEMATICAL INITIAL GHAIN return true; - if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42) + if (codePoint >= 0x1EE42 && codePoint <= 0x1EE42) // ARABIC MATHEMATICAL TAILED JEEM .. ARABIC MATHEMATICAL TAILED JEEM return true; - if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47) + if (codePoint >= 0x1EE47 && codePoint <= 0x1EE47) // ARABIC MATHEMATICAL TAILED HAH .. ARABIC MATHEMATICAL TAILED HAH return true; - if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49) + if (codePoint >= 0x1EE49 && codePoint <= 0x1EE49) // ARABIC MATHEMATICAL TAILED YEH .. ARABIC MATHEMATICAL TAILED YEH return true; - if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b) + if (codePoint >= 0x1EE4B && codePoint <= 0x1EE4B) // ARABIC MATHEMATICAL TAILED LAM .. ARABIC MATHEMATICAL TAILED LAM return true; - if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f) + if (codePoint >= 0x1EE4D && codePoint <= 0x1EE4F) // ARABIC MATHEMATICAL TAILED NOON .. ARABIC MATHEMATICAL TAILED AIN return true; - if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52) + if (codePoint >= 0x1EE51 && codePoint <= 0x1EE52) // ARABIC MATHEMATICAL TAILED SAD .. ARABIC MATHEMATICAL TAILED QAF return true; - if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54) + if (codePoint >= 0x1EE54 && codePoint <= 0x1EE54) // ARABIC MATHEMATICAL TAILED SHEEN .. ARABIC MATHEMATICAL TAILED SHEEN return true; - if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57) + if (codePoint >= 0x1EE57 && codePoint <= 0x1EE57) // ARABIC MATHEMATICAL TAILED KHAH .. ARABIC MATHEMATICAL TAILED KHAH return true; - if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59) + if (codePoint >= 0x1EE59 && codePoint <= 0x1EE59) // ARABIC MATHEMATICAL TAILED DAD .. ARABIC MATHEMATICAL TAILED DAD return true; - if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b) + if (codePoint >= 0x1EE5B && codePoint <= 0x1EE5B) // ARABIC MATHEMATICAL TAILED GHAIN .. ARABIC MATHEMATICAL TAILED GHAIN return true; - if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d) + if (codePoint >= 0x1EE5D && codePoint <= 0x1EE5D) // ARABIC MATHEMATICAL TAILED DOTLESS NOON .. ARABIC MATHEMATICAL TAILED DOTLESS NOON return true; - if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f) + if (codePoint >= 0x1EE5F && codePoint <= 0x1EE5F) // ARABIC MATHEMATICAL TAILED DOTLESS QAF .. ARABIC MATHEMATICAL TAILED DOTLESS QAF return true; - if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62) + if (codePoint >= 0x1EE61 && codePoint <= 0x1EE62) // ARABIC MATHEMATICAL STRETCHED BEH .. ARABIC MATHEMATICAL STRETCHED JEEM return true; - if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64) + if (codePoint >= 0x1EE64 && codePoint <= 0x1EE64) // ARABIC MATHEMATICAL STRETCHED HEH .. ARABIC MATHEMATICAL STRETCHED HEH return true; - if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a) + if (codePoint >= 0x1EE67 && codePoint <= 0x1EE6A) // ARABIC MATHEMATICAL STRETCHED HAH .. ARABIC MATHEMATICAL STRETCHED KAF return true; - if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72) + if (codePoint >= 0x1EE6C && codePoint <= 0x1EE72) // ARABIC MATHEMATICAL STRETCHED MEEM .. ARABIC MATHEMATICAL STRETCHED QAF return true; - if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77) + if (codePoint >= 0x1EE74 && codePoint <= 0x1EE77) // ARABIC MATHEMATICAL STRETCHED SHEEN .. ARABIC MATHEMATICAL STRETCHED KHAH return true; - if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c) + if (codePoint >= 0x1EE79 && codePoint <= 0x1EE7C) // ARABIC MATHEMATICAL STRETCHED DAD .. ARABIC MATHEMATICAL STRETCHED DOTLESS BEH return true; - if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e) + if (codePoint >= 0x1EE7E && codePoint <= 0x1EE7E) // ARABIC MATHEMATICAL STRETCHED DOTLESS FEH .. ARABIC MATHEMATICAL STRETCHED DOTLESS FEH return true; - if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89) + if (codePoint >= 0x1EE80 && codePoint <= 0x1EE89) // ARABIC MATHEMATICAL LOOPED ALEF .. ARABIC MATHEMATICAL LOOPED YEH return true; - if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b) + if (codePoint >= 0x1EE8B && codePoint <= 0x1EE9B) // ARABIC MATHEMATICAL LOOPED LAM .. ARABIC MATHEMATICAL LOOPED GHAIN return true; - if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3) + if (codePoint >= 0x1EEA1 && codePoint <= 0x1EEA3) // ARABIC MATHEMATICAL DOUBLE-STRUCK BEH .. ARABIC MATHEMATICAL DOUBLE-STRUCK DAL return true; - if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9) + if (codePoint >= 0x1EEA5 && codePoint <= 0x1EEA9) // ARABIC MATHEMATICAL DOUBLE-STRUCK WAW .. ARABIC MATHEMATICAL DOUBLE-STRUCK YEH return true; - if (codePoint >= 0x1eeab && codePoint <= 0x1eebb) + if (codePoint >= 0x1EEAB && codePoint <= 0x1EEBB) // ARABIC MATHEMATICAL DOUBLE-STRUCK LAM .. ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN return true; - if (codePoint >= 0x20000 && codePoint <= 0x2a6d6) + if (codePoint >= 0x20000 && codePoint <= 0x2A6D6) // CJK Ideograph Extension B .. CJK Ideograph Extension B return true; - if (codePoint >= 0x2a700 && codePoint <= 0x2b734) + if (codePoint >= 0x2A700 && codePoint <= 0x2B734) // CJK Ideograph Extension C .. CJK Ideograph Extension C return true; - if (codePoint >= 0x2b740 && codePoint <= 0x2b81d) + if (codePoint >= 0x2B740 && codePoint <= 0x2B81D) // CJK Ideograph Extension D .. CJK Ideograph Extension D return true; - if (codePoint >= 0x2b820 && codePoint <= 0x2cea1) + if (codePoint >= 0x2B820 && codePoint <= 0x2CEA1) // CJK Ideograph Extension E .. CJK Ideograph Extension E return true; - if (codePoint >= 0x2ceb0 && codePoint <= 0x2ebe0) + if (codePoint >= 0x2CEB0 && codePoint <= 0x2EBE0) // CJK Ideograph Extension F .. CJK Ideograph Extension F return true; - if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d) + if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) // CJK COMPATIBILITY IDEOGRAPH-2F800 .. CJK COMPATIBILITY IDEOGRAPH-2FA1D return true; - if (codePoint >= 0xe0100 && codePoint <= 0xe01ef) + if (codePoint >= 0xE0100 && codePoint <= 0xE01EF) // VARIATION SELECTOR-17 .. VARIATION SELECTOR-256 return true; return false; } + +bool +js::unicode::CanUpperCaseSpecialCasing(char16_t ch) +{ + if (ch < 0x00DF || ch > 0xFB17) + return false; + if (ch <= 0x0587) { + // U+00DF LATIN SMALL LETTER SHARP S + // U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPHE (LATIN SMALL LETTER APOSTROPHE N) + // U+01F0 LATIN SMALL LETTER J WITH CARON (LATIN SMALL LETTER J HACEK) + // U+0390 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER IOTA DIAERESIS TONOS) + // U+03B0 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER UPSILON DIAERESIS TONOS) + // U+0587 ARMENIAN SMALL LIGATURE ECH YIWN + return ch == 0x00DF || ch == 0x0149 || ch == 0x01F0 || ch == 0x0390 || ch == 0x03B0 || + ch == 0x0587; + } + if (ch <= 0x1FFC) { + // U+1E96 LATIN SMALL LETTER H WITH LINE BELOW .. U+1E9A LATIN SMALL LETTER A WITH RIGHT HALF RING + if (ch <= 0x1E9A) + return ch >= 0x1E96; + if (ch < 0x1F50) + return false; + // U+1F50 GREEK SMALL LETTER UPSILON WITH PSILI + // U+1F52 GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA + // U+1F54 GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA + // U+1F56 GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI + // U+1F80 GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI .. U+1FAF GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + // U+1FB2 GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI .. U+1FB4 GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + // U+1FB6 GREEK SMALL LETTER ALPHA WITH PERISPOMENI .. U+1FB7 GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + // U+1FBC GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + // U+1FC2 GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI .. U+1FC4 GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + // U+1FC6 GREEK SMALL LETTER ETA WITH PERISPOMENI .. U+1FC7 GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + // U+1FCC GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + // U+1FD2 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA .. U+1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + // U+1FD6 GREEK SMALL LETTER IOTA WITH PERISPOMENI .. U+1FD7 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI + // U+1FE2 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA .. U+1FE4 GREEK SMALL LETTER RHO WITH PSILI + // U+1FE6 GREEK SMALL LETTER UPSILON WITH PERISPOMENI .. U+1FE7 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI + // U+1FF2 GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI .. U+1FF4 GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + // U+1FF6 GREEK SMALL LETTER OMEGA WITH PERISPOMENI .. U+1FF7 GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + // U+1FFC GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + return ch == 0x1F50 || ch == 0x1F52 || ch == 0x1F54 || ch == 0x1F56 || + (ch >= 0x1F80 && ch <= 0x1FAF) || (ch >= 0x1FB2 && ch <= 0x1FB4) || + (ch >= 0x1FB6 && ch <= 0x1FB7) || ch == 0x1FBC || (ch >= 0x1FC2 && ch <= 0x1FC4) || + (ch >= 0x1FC6 && ch <= 0x1FC7) || ch == 0x1FCC || (ch >= 0x1FD2 && ch <= 0x1FD3) || + (ch >= 0x1FD6 && ch <= 0x1FD7) || (ch >= 0x1FE2 && ch <= 0x1FE4) || + (ch >= 0x1FE6 && ch <= 0x1FE7) || (ch >= 0x1FF2 && ch <= 0x1FF4) || + (ch >= 0x1FF6 && ch <= 0x1FF7) || ch == 0x1FFC; + } + if (ch < 0xFB00) + return false; + // U+FB00 LATIN SMALL LIGATURE FF .. U+FB06 LATIN SMALL LIGATURE ST + // U+FB13 ARMENIAN SMALL LIGATURE MEN NOW .. U+FB17 ARMENIAN SMALL LIGATURE MEN XEH + return (ch >= 0xFB00 && ch <= 0xFB06) || (ch >= 0xFB13 && ch <= 0xFB17); +} + +size_t +js::unicode::LengthUpperCaseSpecialCasing(char16_t ch) +{ + switch(ch) { + case 0x00DF: return 2; // LATIN SMALL LETTER SHARP S + case 0x0149: return 2; // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE (LATIN SMALL LETTER APOSTROPHE N) + case 0x01F0: return 2; // LATIN SMALL LETTER J WITH CARON (LATIN SMALL LETTER J HACEK) + case 0x0390: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER IOTA DIAERESIS TONOS) + case 0x03B0: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER UPSILON DIAERESIS TONOS) + case 0x0587: return 2; // ARMENIAN SMALL LIGATURE ECH YIWN + case 0x1E96: return 2; // LATIN SMALL LETTER H WITH LINE BELOW + case 0x1E97: return 2; // LATIN SMALL LETTER T WITH DIAERESIS + case 0x1E98: return 2; // LATIN SMALL LETTER W WITH RING ABOVE + case 0x1E99: return 2; // LATIN SMALL LETTER Y WITH RING ABOVE + case 0x1E9A: return 2; // LATIN SMALL LETTER A WITH RIGHT HALF RING + case 0x1F50: return 2; // GREEK SMALL LETTER UPSILON WITH PSILI + case 0x1F52: return 3; // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA + case 0x1F54: return 3; // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA + case 0x1F56: return 3; // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI + case 0x1F80: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI + case 0x1F81: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI + case 0x1F82: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1F83: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1F84: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1F85: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1F86: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F87: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F88: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + case 0x1F89: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + case 0x1F8A: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F8B: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F8C: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F8D: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F8E: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F8F: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F90: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI + case 0x1F91: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI + case 0x1F92: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1F93: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1F94: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1F95: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1F96: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F97: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F98: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + case 0x1F99: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + case 0x1F9A: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F9B: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F9C: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F9D: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F9E: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F9F: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FA0: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI + case 0x1FA1: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI + case 0x1FA2: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1FA3: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1FA4: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1FA5: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1FA6: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1FA7: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1FA8: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + case 0x1FA9: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + case 0x1FAA: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1FAB: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1FAC: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1FAD: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1FAE: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FAF: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FB2: return 2; // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI + case 0x1FB3: return 2; // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI + case 0x1FB4: return 2; // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + case 0x1FB6: return 2; // GREEK SMALL LETTER ALPHA WITH PERISPOMENI + case 0x1FB7: return 3; // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FBC: return 2; // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + case 0x1FC2: return 2; // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI + case 0x1FC3: return 2; // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI + case 0x1FC4: return 2; // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + case 0x1FC6: return 2; // GREEK SMALL LETTER ETA WITH PERISPOMENI + case 0x1FC7: return 3; // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FCC: return 2; // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + case 0x1FD2: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA + case 0x1FD3: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + case 0x1FD6: return 2; // GREEK SMALL LETTER IOTA WITH PERISPOMENI + case 0x1FD7: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI + case 0x1FE2: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA + case 0x1FE3: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA + case 0x1FE4: return 2; // GREEK SMALL LETTER RHO WITH PSILI + case 0x1FE6: return 2; // GREEK SMALL LETTER UPSILON WITH PERISPOMENI + case 0x1FE7: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI + case 0x1FF2: return 2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI + case 0x1FF3: return 2; // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI + case 0x1FF4: return 2; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + case 0x1FF6: return 2; // GREEK SMALL LETTER OMEGA WITH PERISPOMENI + case 0x1FF7: return 3; // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FFC: return 2; // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + case 0xFB00: return 2; // LATIN SMALL LIGATURE FF + case 0xFB01: return 2; // LATIN SMALL LIGATURE FI + case 0xFB02: return 2; // LATIN SMALL LIGATURE FL + case 0xFB03: return 3; // LATIN SMALL LIGATURE FFI + case 0xFB04: return 3; // LATIN SMALL LIGATURE FFL + case 0xFB05: return 2; // LATIN SMALL LIGATURE LONG S T + case 0xFB06: return 2; // LATIN SMALL LIGATURE ST + case 0xFB13: return 2; // ARMENIAN SMALL LIGATURE MEN NOW + case 0xFB14: return 2; // ARMENIAN SMALL LIGATURE MEN ECH + case 0xFB15: return 2; // ARMENIAN SMALL LIGATURE MEN INI + case 0xFB16: return 2; // ARMENIAN SMALL LIGATURE VEW NOW + case 0xFB17: return 2; // ARMENIAN SMALL LIGATURE MEN XEH + } + + MOZ_ASSERT_UNREACHABLE("Bad character input."); + return 0; +} + +void +js::unicode::AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index) +{ + switch(ch) { + case 0x00DF: // LATIN SMALL LETTER SHARP S + elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S + elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S + return; + case 0x0149: // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE (LATIN SMALL LETTER APOSTROPHE N) + elements[(*index)++] = 0x02BC; // MODIFIER LETTER APOSTROPHE + elements[(*index)++] = 0x004E; // LATIN CAPITAL LETTER N + return; + case 0x01F0: // LATIN SMALL LETTER J WITH CARON (LATIN SMALL LETTER J HACEK) + elements[(*index)++] = 0x004A; // LATIN CAPITAL LETTER J + elements[(*index)++] = 0x030C; // COMBINING CARON (NON-SPACING HACEK) + return; + case 0x0390: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER IOTA DIAERESIS TONOS) + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE) + return; + case 0x03B0: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER UPSILON DIAERESIS TONOS) + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE) + return; + case 0x0587: // ARMENIAN SMALL LIGATURE ECH YIWN + elements[(*index)++] = 0x0535; // ARMENIAN CAPITAL LETTER ECH + elements[(*index)++] = 0x0552; // ARMENIAN CAPITAL LETTER YIWN + return; + case 0x1E96: // LATIN SMALL LETTER H WITH LINE BELOW + elements[(*index)++] = 0x0048; // LATIN CAPITAL LETTER H + elements[(*index)++] = 0x0331; // COMBINING MACRON BELOW (NON-SPACING MACRON BELOW) + return; + case 0x1E97: // LATIN SMALL LETTER T WITH DIAERESIS + elements[(*index)++] = 0x0054; // LATIN CAPITAL LETTER T + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + return; + case 0x1E98: // LATIN SMALL LETTER W WITH RING ABOVE + elements[(*index)++] = 0x0057; // LATIN CAPITAL LETTER W + elements[(*index)++] = 0x030A; // COMBINING RING ABOVE (NON-SPACING RING ABOVE) + return; + case 0x1E99: // LATIN SMALL LETTER Y WITH RING ABOVE + elements[(*index)++] = 0x0059; // LATIN CAPITAL LETTER Y + elements[(*index)++] = 0x030A; // COMBINING RING ABOVE (NON-SPACING RING ABOVE) + return; + case 0x1E9A: // LATIN SMALL LETTER A WITH RIGHT HALF RING + elements[(*index)++] = 0x0041; // LATIN CAPITAL LETTER A + elements[(*index)++] = 0x02BE; // MODIFIER LETTER RIGHT HALF RING + return; + case 0x1F50: // GREEK SMALL LETTER UPSILON WITH PSILI + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE) + return; + case 0x1F52: // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE) + elements[(*index)++] = 0x0300; // COMBINING GRAVE ACCENT (NON-SPACING GRAVE) + return; + case 0x1F54: // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE) + elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE) + return; + case 0x1F56: // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE) + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1F80: // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F08; // GREEK CAPITAL LETTER ALPHA WITH PSILI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F81: // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F09; // GREEK CAPITAL LETTER ALPHA WITH DASIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F82: // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F0A; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F83: // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F0B; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F84: // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F0C; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F85: // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F0D; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F86: // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F0E; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F87: // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F0F; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F88: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F08; // GREEK CAPITAL LETTER ALPHA WITH PSILI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F89: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F09; // GREEK CAPITAL LETTER ALPHA WITH DASIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F8A: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F0A; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F8B: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F0B; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F8C: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F0C; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F8D: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F0D; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F8E: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F0E; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F8F: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F0F; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F90: // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F28; // GREEK CAPITAL LETTER ETA WITH PSILI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F91: // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F29; // GREEK CAPITAL LETTER ETA WITH DASIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F92: // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F2A; // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F93: // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F2B; // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F94: // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F2C; // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F95: // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F2D; // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F96: // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F2E; // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F97: // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F2F; // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F98: // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F28; // GREEK CAPITAL LETTER ETA WITH PSILI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F99: // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F29; // GREEK CAPITAL LETTER ETA WITH DASIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F9A: // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F2A; // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F9B: // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F2B; // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F9C: // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F2C; // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F9D: // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F2D; // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F9E: // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F2E; // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1F9F: // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F2F; // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA0: // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F68; // GREEK CAPITAL LETTER OMEGA WITH PSILI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA1: // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F69; // GREEK CAPITAL LETTER OMEGA WITH DASIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA2: // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F6A; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA3: // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F6B; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA4: // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F6C; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA5: // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F6D; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA6: // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F6E; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA7: // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x1F6F; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA8: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F68; // GREEK CAPITAL LETTER OMEGA WITH PSILI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FA9: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F69; // GREEK CAPITAL LETTER OMEGA WITH DASIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FAA: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F6A; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FAB: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F6B; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FAC: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F6C; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FAD: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F6D; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FAE: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F6E; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FAF: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + elements[(*index)++] = 0x1F6F; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FB2: // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1FBA; // GREEK CAPITAL LETTER ALPHA WITH VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FB3: // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI + elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FB4: // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x0386; // GREEK CAPITAL LETTER ALPHA WITH TONOS (GREEK CAPITAL LETTER ALPHA TONOS) + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FB6: // GREEK SMALL LETTER ALPHA WITH PERISPOMENI + elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1FB7: // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FBC: // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FC2: // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1FCA; // GREEK CAPITAL LETTER ETA WITH VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FC3: // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI + elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FC4: // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x0389; // GREEK CAPITAL LETTER ETA WITH TONOS (GREEK CAPITAL LETTER ETA TONOS) + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FC6: // GREEK SMALL LETTER ETA WITH PERISPOMENI + elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1FC7: // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FCC: // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FD2: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0300; // COMBINING GRAVE ACCENT (NON-SPACING GRAVE) + return; + case 0x1FD3: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE) + return; + case 0x1FD6: // GREEK SMALL LETTER IOTA WITH PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1FD7: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1FE2: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0300; // COMBINING GRAVE ACCENT (NON-SPACING GRAVE) + return; + case 0x1FE3: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE) + return; + case 0x1FE4: // GREEK SMALL LETTER RHO WITH PSILI + elements[(*index)++] = 0x03A1; // GREEK CAPITAL LETTER RHO + elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE) + return; + case 0x1FE6: // GREEK SMALL LETTER UPSILON WITH PERISPOMENI + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1FE7: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI + elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON + elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS) + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1FF2: // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x1FFA; // GREEK CAPITAL LETTER OMEGA WITH VARIA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FF3: // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI + elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FF4: // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + elements[(*index)++] = 0x038F; // GREEK CAPITAL LETTER OMEGA WITH TONOS (GREEK CAPITAL LETTER OMEGA TONOS) + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FF6: // GREEK SMALL LETTER OMEGA WITH PERISPOMENI + elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + return; + case 0x1FF7: // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA + elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0x1FFC: // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA + elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA + return; + case 0xFB00: // LATIN SMALL LIGATURE FF + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + return; + case 0xFB01: // LATIN SMALL LIGATURE FI + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + elements[(*index)++] = 0x0049; // LATIN CAPITAL LETTER I + return; + case 0xFB02: // LATIN SMALL LIGATURE FL + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + elements[(*index)++] = 0x004C; // LATIN CAPITAL LETTER L + return; + case 0xFB03: // LATIN SMALL LIGATURE FFI + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + elements[(*index)++] = 0x0049; // LATIN CAPITAL LETTER I + return; + case 0xFB04: // LATIN SMALL LIGATURE FFL + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F + elements[(*index)++] = 0x004C; // LATIN CAPITAL LETTER L + return; + case 0xFB05: // LATIN SMALL LIGATURE LONG S T + elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S + elements[(*index)++] = 0x0054; // LATIN CAPITAL LETTER T + return; + case 0xFB06: // LATIN SMALL LIGATURE ST + elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S + elements[(*index)++] = 0x0054; // LATIN CAPITAL LETTER T + return; + case 0xFB13: // ARMENIAN SMALL LIGATURE MEN NOW + elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN + elements[(*index)++] = 0x0546; // ARMENIAN CAPITAL LETTER NOW + return; + case 0xFB14: // ARMENIAN SMALL LIGATURE MEN ECH + elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN + elements[(*index)++] = 0x0535; // ARMENIAN CAPITAL LETTER ECH + return; + case 0xFB15: // ARMENIAN SMALL LIGATURE MEN INI + elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN + elements[(*index)++] = 0x053B; // ARMENIAN CAPITAL LETTER INI + return; + case 0xFB16: // ARMENIAN SMALL LIGATURE VEW NOW + elements[(*index)++] = 0x054E; // ARMENIAN CAPITAL LETTER VEW + elements[(*index)++] = 0x0546; // ARMENIAN CAPITAL LETTER NOW + return; + case 0xFB17: // ARMENIAN SMALL LIGATURE MEN XEH + elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN + elements[(*index)++] = 0x053D; // ARMENIAN CAPITAL LETTER XEH + return; + } + + MOZ_ASSERT_UNREACHABLE("Bad character input."); + return; +} diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h index d8807a4deb..3f87e47fb6 100644 --- a/js/src/vm/Unicode.h +++ b/js/src/vm/Unicode.h @@ -62,8 +62,16 @@ namespace CharFlag { const uint8_t UNICODE_ID_CONTINUE = UNICODE_ID_START + UNICODE_ID_CONTINUE_ONLY; } +const char16_t NO_BREAK_SPACE = 0x00A0; +const char16_t MICRO_SIGN = 0x00B5; +const char16_t LATIN_SMALL_LETTER_SHARP_S = 0x00DF; +const char16_t LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF; +const char16_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = 0x0130; +const char16_t COMBINING_DOT_ABOVE = 0x0307; +const char16_t GREEK_CAPITAL_LETTER_SIGMA = 0x03A3; +const char16_t GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2; +const char16_t GREEK_SMALL_LETTER_SIGMA = 0x03C3; const char16_t BYTE_ORDER_MARK2 = 0xFFFE; -const char16_t NO_BREAK_SPACE = 0x00A0; const char16_t LeadSurrogateMin = 0xD800; const char16_t LeadSurrogateMax = 0xDBFF; @@ -239,6 +247,10 @@ IsSpaceOrBOM2(char16_t ch) return CharInfo(ch).isSpace(); } +/* + * Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for + * details) of the given UTF-16 code unit. + */ inline char16_t ToUpperCase(char16_t ch) { @@ -253,6 +265,10 @@ ToUpperCase(char16_t ch) return uint16_t(ch) + info.upperCase; } +/* + * Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for + * details) of the given UTF-16 code unit. + */ inline char16_t ToLowerCase(char16_t ch) { @@ -330,6 +346,43 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail) } /* + * Returns true if the given UTF-16 code unit has a language-independent, + * unconditional or conditional special upper case mapping. + * + * Unicode defines two case mapping modes: + * 1. "simple case mappings" for one-to-one mappings which are independent of + * context and language (defined in UnicodeData.txt). + * 2. "special case mappings" for mappings which can increase or decrease the + * string length; or are dependent on context or locale (defined in + * SpecialCasing.txt). + * + * The CanUpperCase() method defined above only supports simple case mappings. + * In order to support the full case mappings of all Unicode characters, + * callers need to check this method in addition to CanUpperCase(). + * + * NOTE: All special upper case mappings are unconditional in Unicode 9. + */ +bool +CanUpperCaseSpecialCasing(char16_t ch); + +/* + * Returns the length of the upper case mapping of |ch|. + * + * This function asserts if |ch| doesn't have a special upper case mapping. + */ +size_t +LengthUpperCaseSpecialCasing(char16_t ch); + +/* + * Appends the upper case mapping of |ch| to the given output buffer, + * starting at the provided index. + * + * This function asserts if |ch| doesn't have a special upper case mapping. + */ +void +AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index); + +/* * For a codepoint C, CodepointsWithSameUpperCaseInfo stores three offsets * from C to up to three codepoints with same uppercase (no codepoint in * UnicodeData.txt has more than three such codepoints). @@ -504,7 +557,7 @@ UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail) *trail = TrailSurrogate(codePoint); } -static inline void +inline void UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index) { if (!IsSupplementary(codePoint)) { diff --git a/js/src/vm/UnicodeNonBMP.h b/js/src/vm/UnicodeNonBMP.h index 687c1851e6..8b9fb2a4f2 100644 --- a/js/src/vm/UnicodeNonBMP.h +++ b/js/src/vm/UnicodeNonBMP.h @@ -19,6 +19,12 @@ // DIFF: the difference between the code point in the range and // converted code point +// U+10400 DESERET CAPITAL LETTER LONG I .. U+10427 DESERET CAPITAL LETTER EW +// U+104B0 OSAGE CAPITAL LETTER A .. U+104D3 OSAGE CAPITAL LETTER ZHA +// U+10C80 OLD HUNGARIAN CAPITAL LETTER A .. U+10CB2 OLD HUNGARIAN CAPITAL LETTER US +// U+118A0 WARANG CITI CAPITAL LETTER NGAA .. U+118BF WARANG CITI CAPITAL LETTER VIYO +// U+16E40 MEDEFAIDRIN CAPITAL LETTER M .. U+16E5F MEDEFAIDRIN CAPITAL LETTER Y +// U+1E900 ADLAM CAPITAL LETTER ALIF .. U+1E921 ADLAM CAPITAL LETTER SHA #define FOR_EACH_NON_BMP_LOWERCASE(macro) \ macro(0x10400, 0x10427, 0xd801, 0xdc00, 0xdc27, 40) \ macro(0x104b0, 0x104d3, 0xd801, 0xdcb0, 0xdcd3, 40) \ @@ -27,6 +33,12 @@ macro(0x16e40, 0x16e5f, 0xd81b, 0xde40, 0xde5f, 32) \ macro(0x1e900, 0x1e921, 0xd83a, 0xdd00, 0xdd21, 34) +// U+10428 DESERET SMALL LETTER LONG I .. U+1044F DESERET SMALL LETTER EW +// U+104D8 OSAGE SMALL LETTER A .. U+104FB OSAGE SMALL LETTER ZHA +// U+10CC0 OLD HUNGARIAN SMALL LETTER A .. U+10CF2 OLD HUNGARIAN SMALL LETTER US +// U+118C0 WARANG CITI SMALL LETTER NGAA .. U+118DF WARANG CITI SMALL LETTER VIYO +// U+16E60 MEDEFAIDRIN SMALL LETTER M .. U+16E7F MEDEFAIDRIN SMALL LETTER Y +// U+1E922 ADLAM SMALL LETTER ALIF .. U+1E943 ADLAM SMALL LETTER SHA #define FOR_EACH_NON_BMP_UPPERCASE(macro) \ macro(0x10428, 0x1044f, 0xd801, 0xdc28, 0xdc4f, -40) \ macro(0x104d8, 0x104fb, 0xd801, 0xdcd8, 0xdcfb, -40) \ @@ -35,6 +47,12 @@ macro(0x16e60, 0x16e7f, 0xd81b, 0xde60, 0xde7f, -32) \ macro(0x1e922, 0x1e943, 0xd83a, 0xdd22, 0xdd43, -34) +// U+10400 DESERET CAPITAL LETTER LONG I .. U+10427 DESERET CAPITAL LETTER EW +// U+104B0 OSAGE CAPITAL LETTER A .. U+104D3 OSAGE CAPITAL LETTER ZHA +// U+10C80 OLD HUNGARIAN CAPITAL LETTER A .. U+10CB2 OLD HUNGARIAN CAPITAL LETTER US +// U+118A0 WARANG CITI CAPITAL LETTER NGAA .. U+118BF WARANG CITI CAPITAL LETTER VIYO +// U+16E40 MEDEFAIDRIN CAPITAL LETTER M .. U+16E5F MEDEFAIDRIN CAPITAL LETTER Y +// U+1E900 ADLAM CAPITAL LETTER ALIF .. U+1E921 ADLAM CAPITAL LETTER SHA #define FOR_EACH_NON_BMP_CASE_FOLDING(macro) \ macro(0x10400, 0x10427, 0xd801, 0xdc00, 0xdc27, 40) \ macro(0x104b0, 0x104d3, 0xd801, 0xdcb0, 0xdcd3, 40) \ @@ -43,6 +61,12 @@ macro(0x16e40, 0x16e5f, 0xd81b, 0xde40, 0xde5f, 32) \ macro(0x1e900, 0x1e921, 0xd83a, 0xdd00, 0xdd21, 34) +// U+10428 DESERET SMALL LETTER LONG I .. U+1044F DESERET SMALL LETTER EW +// U+104D8 OSAGE SMALL LETTER A .. U+104FB OSAGE SMALL LETTER ZHA +// U+10CC0 OLD HUNGARIAN SMALL LETTER A .. U+10CF2 OLD HUNGARIAN SMALL LETTER US +// U+118C0 WARANG CITI SMALL LETTER NGAA .. U+118DF WARANG CITI SMALL LETTER VIYO +// U+16E60 MEDEFAIDRIN SMALL LETTER M .. U+16E7F MEDEFAIDRIN SMALL LETTER Y +// U+1E922 ADLAM SMALL LETTER ALIF .. U+1E943 ADLAM SMALL LETTER SHA #define FOR_EACH_NON_BMP_REV_CASE_FOLDING(macro) \ macro(0x10428, 0x1044f, 0xd801, 0xdc28, 0xdc4f, -40) \ macro(0x104d8, 0x104fb, 0xd801, 0xdcd8, 0xdcfb, -40) \ diff --git a/js/src/vm/make_unicode.py b/js/src/vm/make_unicode.py index b55b1940e4..8568ccb64c 100755 --- a/js/src/vm/make_unicode.py +++ b/js/src/vm/make_unicode.py @@ -26,6 +26,18 @@ import re import os import sys from contextlib import closing +from functools import partial +from itertools import chain, groupby, ifilter, imap, izip_longest, tee +from operator import is_not, itemgetter + +class codepoint_dict(dict): + def name(self, code_point): + (_, _, name, alias) = self[code_point] + return '{}{}'.format(name, (' (' + alias + ')' if alias else '')) + + def full_name(self, code_point): + (_, _, name, alias) = self[code_point] + return 'U+{:04X} {}{}'.format(code_point, name, (' (' + alias + ')' if alias else '')) # ECMAScript 2016 # §11.2 White Space @@ -132,10 +144,32 @@ def read_derived_core_properties(derived_core_properties): for char in range(int(start, 16), int(end, 16) + 1): yield (char, char_property) +def read_special_casing(special_casing): + # Format: + # <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment> + for line in special_casing: + if line == '\n' or line.startswith('#'): + continue + row = line.split('#')[0].split(';') + code = int(row[0].strip(), 16) + lower = row[1].strip() + lower = [int(c, 16) for c in lower.split(' ')] if lower else [] + upper = row[3].strip() + upper = [int(c, 16) for c in upper.split(' ')] if upper else [] + languages = [] + contexts = [] + condition = row[4].strip() + if condition: + for cond in condition.split(' '): + if cond[0].islower(): + languages.append(cond) + else: + contexts.append(cond) + pass + yield (code, lower, upper, languages, contexts) + def int_ranges(ints): """ Yields consecutive ranges (inclusive) from integer values. """ - from itertools import tee, izip_longest - (a, b) = tee(sorted(ints)) start = next(b) for (curr, succ) in izip_longest(a, b): @@ -153,7 +187,7 @@ def utf16_encode(code): return lead, trail -def make_non_bmp_convert_macro(out_file, name, convert_map): +def make_non_bmp_convert_macro(out_file, name, convert_map, codepoint_table): # Find continuous range in convert_map. convert_list = [] entry = None @@ -179,6 +213,7 @@ def make_non_bmp_convert_macro(out_file, name, convert_map): # Generate macro call for each range. lines = [] + comment = [] for entry in convert_list: from_code = entry['code'] to_code = entry['code'] + entry['length'] - 1 @@ -190,29 +225,15 @@ def make_non_bmp_convert_macro(out_file, name, convert_map): lines.append(' macro(0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, {:d})'.format( from_code, to_code, lead, from_trail, to_trail, diff)) + comment.append('// {} .. {}'.format(codepoint_table.full_name(from_code), + codepoint_table.full_name(to_code))) + out_file.write('\n'.join(comment)) + out_file.write('\n') out_file.write('#define FOR_EACH_NON_BMP_{}(macro) \\\n'.format(name)) out_file.write(' \\\n'.join(lines)) out_file.write('\n') -def for_each_non_bmp_group(group_set): - # Find continuous range in group_set. - group_list = [] - entry = None - for code in sorted(group_set.keys()): - if entry and code == entry['code'] + entry['length']: - entry['length'] += 1 - continue - - entry = { - 'code': code, - 'length': 1 - } - group_list.append(entry) - - for entry in group_list: - yield (entry['code'], entry['code'] + entry['length'] - 1) - def process_derived_core_properties(derived_core_properties): id_start = set() id_continue = set() @@ -236,7 +257,7 @@ def process_unicode_data(unicode_data, derived_core_properties): same_upper_cache = {same_upper_dummy: 0} same_upper_index = [0] * (MAX_BMP + 1) - test_table = {} + codepoint_table = codepoint_dict() test_space_table = [] non_bmp_lower_map = {} @@ -254,15 +275,9 @@ def process_unicode_data(unicode_data, derived_core_properties): alias = row[-5] uppercase = row[-3] lowercase = row[-2] - flags = 0 if uppercase: upper = int(uppercase, 16) - - if upper not in same_upper_map: - same_upper_map[upper] = [code] - else: - same_upper_map[upper].append(code) else: upper = code @@ -271,6 +286,8 @@ def process_unicode_data(unicode_data, derived_core_properties): else: lower = code + codepoint_table[code] = (upper, lower, name, alias) + if code > MAX_BMP: if code != lower: non_bmp_lower_map[code] = lower @@ -285,6 +302,16 @@ def process_unicode_data(unicode_data, derived_core_properties): non_bmp_id_cont_set[code] = 1 continue + assert lower <= MAX_BMP and upper <= MAX_BMP + + if code != upper: + if upper not in same_upper_map: + same_upper_map[upper] = [code] + else: + same_upper_map[upper].append(code) + + flags = 0 + # we combine whitespace and lineterminators because in pratice we don't need them separated if category == 'Zs' or code in whitespace or code in line_terminator: flags |= FLAG_SPACE @@ -298,8 +325,6 @@ def process_unicode_data(unicode_data, derived_core_properties): elif code in id_continue or code in compatibility_identifier_part: flags |= FLAG_UNICODE_ID_CONTINUE_ONLY - test_table[code] = (upper, lower, name, alias) - up_d = upper - code low_d = lower - code @@ -319,12 +344,12 @@ def process_unicode_data(unicode_data, derived_core_properties): index[code] = i for code in range(0, MAX_BMP + 1): - entry = test_table.get(code) + entry = codepoint_table.get(code) if not entry: continue - (upper, lower, name, alias) = entry + (upper, _, _, _) = entry if upper not in same_upper_map: continue @@ -354,7 +379,7 @@ def process_unicode_data(unicode_data, derived_core_properties): non_bmp_lower_map, non_bmp_upper_map, non_bmp_space_set, non_bmp_id_start_set, non_bmp_id_cont_set, - test_table, test_space_table, + codepoint_table, test_space_table, ) def process_case_folding(case_folding): @@ -438,9 +463,149 @@ def process_case_folding(case_folding): folding_tests ) +def process_special_casing(special_casing, table, index): + # Unconditional special casing. + unconditional_tolower = {} + unconditional_toupper = {} + + # Conditional special casing, language independent. + conditional_tolower = {} + conditional_toupper = {} + + # Conditional special casing, language dependent. + lang_conditional_tolower = {} + lang_conditional_toupper = {} + + def caseInfo(code): + (upper, lower, flags) = table[index[code]] + return ((code + lower) & 0xffff, (code + upper) & 0xffff) + + for (code, lower, upper, languages, contexts) in read_special_casing(special_casing): + assert code <= MAX_BMP, 'Unexpected character outside of BMP: %s' % code + assert len(languages) <= 1, 'Expected zero or one language ids: %s' % languages + assert len(contexts) <= 1, 'Expected zero or one casing contexts: %s' % languages + + (default_lower, default_upper) = caseInfo(code) + special_lower = len(lower) != 1 or lower[0] != default_lower + special_upper = len(upper) != 1 or upper[0] != default_upper + + # Invariant: If |code| has casing per UnicodeData.txt, then it also has + # casing rules in SpecialCasing.txt. + assert code == default_lower or len(lower) != 1 or code != lower[0] + assert code == default_upper or len(upper) != 1 or code != upper[0] + + language = languages[0] if languages else None + context = contexts[0] if contexts else None + + if not language and not context: + if special_lower: + unconditional_tolower[code] = lower + if special_upper: + unconditional_toupper[code] = upper + elif not language and context: + if special_lower: + conditional_tolower[code] = (lower, context) + if special_upper: + conditional_toupper[code] = (upper, context) + else: + if language not in lang_conditional_tolower: + lang_conditional_tolower[language] = {} + lang_conditional_toupper[language] = {} + if special_lower: + lang_conditional_tolower[language][code] = (lower, context) + if special_upper: + lang_conditional_toupper[language][code] = (upper, context) + + # Certain special casing rules are inlined in jsstr.cpp, ensure these cases + # still match the current SpecialCasing.txt file. + def lowerCase(code): + (lower, _) = caseInfo(code) + return lower + + def upperCase(code): + (_, upper) = caseInfo(code) + return upper + + def ascii(char_dict): + return ifilter(lambda ch: ch <= 0x7f, char_dict.iterkeys()) + + def latin1(char_dict): + return ifilter(lambda ch: ch <= 0xff, char_dict.iterkeys()) + + def is_empty(iterable): + return not any(True for _ in iterable) + + def is_equals(iter1, iter2): + return all(x == y for (x, y) in izip_longest(iter1, iter2)) + + # Ensure no ASCII characters have special case mappings. + assert is_empty(ascii(unconditional_tolower)) + assert is_empty(ascii(unconditional_toupper)) + assert is_empty(ascii(conditional_tolower)) + assert is_empty(ascii(conditional_toupper)) + + # Ensure no Latin1 characters have special lower case mappings. + assert is_empty(latin1(unconditional_tolower)) + assert is_empty(latin1(conditional_tolower)) + + # Ensure no Latin1 characters have conditional special upper case mappings. + assert is_empty(latin1(conditional_toupper)) + + # Ensure U+00DF is the only Latin1 character with a special upper case mapping. + assert is_equals([0x00DF], latin1(unconditional_toupper)) + + # Ensure U+0130 is the only character with a special lower case mapping. + assert is_equals([0x0130], unconditional_tolower) + + # Ensure no characters have language independent conditional upper case mappings. + assert is_empty(conditional_toupper) + + # Ensure U+03A3 is the only character with language independent conditional lower case mapping. + assert is_equals([0x03A3], conditional_tolower) + + # Verify U+0130 and U+03A3 have simple lower case mappings. + assert all(ch != lowerCase(ch) for ch in [0x0130, 0x03A3]) + + # Ensure Azeri, Lithuanian, and Turkish are the only languages with conditional case mappings. + assert is_equals(["az", "lt", "tr"], sorted(lang_conditional_tolower.iterkeys())) + assert is_equals(["az", "lt", "tr"], sorted(lang_conditional_toupper.iterkeys())) + + # Maximum case mapping length is three characters. + itervals = lambda d: d.itervalues() + assert max(imap(len, chain( + itervals(unconditional_tolower), + itervals(unconditional_toupper), + imap(itemgetter(0), itervals(conditional_tolower)), + imap(itemgetter(0), itervals(conditional_toupper)), + imap(itemgetter(0), chain.from_iterable(imap(itervals, itervals(lang_conditional_tolower)))), + imap(itemgetter(0), chain.from_iterable(imap(itervals, itervals(lang_conditional_toupper)))), + ))) <= 3 + + # Ensure all case mapping contexts are known (see Unicode 9.0, §3.13 Default Case Algorithms). + assert set([ + 'After_I', 'After_Soft_Dotted', 'Final_Sigma', 'More_Above', 'Not_Before_Dot', + ]).issuperset(set(ifilter(partial(is_not, None), chain( + imap(itemgetter(1), itervals(conditional_tolower)), + imap(itemgetter(1), itervals(conditional_toupper)), + imap(itemgetter(1), chain.from_iterable(imap(itervals, itervals(lang_conditional_tolower)))), + imap(itemgetter(1), chain.from_iterable(imap(itervals, itervals(lang_conditional_toupper)))), + )))) + + # Special casing for U+00DF (LATIN SMALL LETTER SHARP S). + assert upperCase(0x00DF) == 0x00DF and unconditional_toupper[0x00DF] == [0x0053, 0x0053]; + + # Special casing for U+0130 (LATIN CAPITAL LETTER I WITH DOT ABOVE). + assert unconditional_tolower[0x0130] == [0x0069, 0x0307] + + # Special casing for U+03A3 (GREEK CAPITAL LETTER SIGMA). + assert lowerCase(0x03A3) == 0x03C3 and conditional_tolower[0x03A3] == ([0x03C2], 'Final_Sigma'); + + return (unconditional_tolower, unconditional_toupper) + def make_non_bmp_file(version, non_bmp_lower_map, non_bmp_upper_map, - non_bmp_folding_map, non_bmp_rev_folding_map): + non_bmp_folding_map, non_bmp_rev_folding_map, + codepoint_table): file_name = 'UnicodeNonBMP.h'; with io.open(file_name, mode='wb') as non_bmp_file: non_bmp_file.write(mpl_license) @@ -463,77 +628,277 @@ def make_non_bmp_file(version, """) - make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map) + make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map, codepoint_table) non_bmp_file.write('\n') - make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map) + make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map, codepoint_table) non_bmp_file.write('\n') - make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map) + make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map, codepoint_table) non_bmp_file.write('\n') - make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map) + make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map, codepoint_table) non_bmp_file.write(""" #endif /* vm_UnicodeNonBMP_h */ """) -def make_bmp_mapping_test(version, test_table): +def write_special_casing_methods(unconditional_toupper, codepoint_table, println): + def hexlit(n): + """ Returns C++ hex-literal for |n|. """ + return '0x{:04X}'.format(n) + + def describe_range(ranges, depth): + indent = depth * ' ' + for (start, end) in ranges: + if start == end: + println(indent, '// {}'.format(codepoint_table.full_name(start))) + else: + println(indent, '// {} .. {}'.format(codepoint_table.full_name(start), + codepoint_table.full_name(end))) + + def out_range(start, end): + """ Tests if the input character isn't a member of the set {x | start <= x <= end}. """ + if (start == end): + return 'ch != {}'.format(hexlit(start)) + return 'ch < {} || ch > {}'.format(hexlit(start), hexlit(end)) + + def in_range(start, end, parenthesize=False): + """ Tests if the input character is in the set {x | start <= x <= end}. """ + if (start == end): + return 'ch == {}'.format(hexlit(start)) + (left, right) = ('(', ')') if parenthesize else ('', '') + return '{}ch >= {} && ch <= {}{}'.format(left, hexlit(start), hexlit(end), right) + + def in_any_range(ranges, spaces): + """ Tests if the input character is included in any of the given ranges. """ + lines = [[]] + for (start, end) in ranges: + expr = in_range(start, end, parenthesize=True) + line = ' || '.join(lines[-1] + [expr]) + if len(line) < (100 - len(spaces) - len(' ||')): + lines[-1].append(expr) + else: + lines.append([expr]) + return ' ||\n{}'.format(spaces).join(imap(lambda t: ' || '.join(t), lines)) + + def write_range_accept(parent_list, child_list, depth): + """ Accepts the input character if it matches any code unit in |child_list|. """ + (min_parent, max_parent) = (parent_list[0], parent_list[-1]) + (min_child, max_child) = (child_list[0], child_list[-1]) + assert min_child >= min_parent + assert max_child <= max_parent + indent = depth * ' ' + + child_ranges = list(int_ranges(child_list)) + has_successor = max_child != max_parent + + # If |child_list| is a contiguous list of code units, emit a simple + # range check: |min_child <= input <= max_child|. + if len(child_ranges) == 1: + describe_range(child_ranges, depth) + if has_successor: + println(indent, 'if (ch <= {})'.format(hexlit(max_child))) + println(indent, ' return ch >= {};'.format(hexlit(min_child))) + else: + println(indent, 'return {};'.format(in_range(min_child, max_child))) + return + + # Otherwise create a disjunction over the subranges in |child_ranges|. + if not has_successor: + spaces = indent + len('return ') * ' ' + else: + spaces = indent + len(' return ') * ' ' + range_test_expr = in_any_range(child_ranges, spaces) + + if min_child != min_parent: + println(indent, 'if (ch < {})'.format(hexlit(min_child))) + println(indent, ' return false;') + + # If there's no successor block, we can omit the |input <= max_child| check, + # because it was already checked when we emitted the parent range test. + if not has_successor: + describe_range(child_ranges, depth) + println(indent, 'return {};'.format(range_test_expr)) + else: + println(indent, 'if (ch <= {}) {{'.format(hexlit(max_child))) + describe_range(child_ranges, depth + 1) + println(indent, ' return {};'.format(range_test_expr)) + println(indent, '}') + + def write_CanUpperCaseSpecialCasing(): + """ Checks if the input has a special upper case mapping. """ + println('bool') + println('js::unicode::CanUpperCaseSpecialCasing(char16_t ch)') + println('{') + + assert unconditional_toupper, "|unconditional_toupper| is not empty" + + # Sorted list of code units with special upper case mappings. + code_list = sorted(unconditional_toupper.iterkeys()) + + # Fail-fast if the input character isn't a special casing character. + println(' if ({})'.format(out_range(code_list[0], code_list[-1]))) + println(' return false;') + + for i in range(0, 16): + # Check if the input characters is in the range: + # |start_point <= input < end_point|. + start_point = i << 12 + end_point = (i + 1) << 12 + matches = [cu for cu in code_list if start_point <= cu < end_point] + + # Skip empty ranges. + if not matches: + continue + + # If |matches| consists of only a few characters, directly check + # the input against the characters in |matches|. + if len(matches) <= 8: + write_range_accept(code_list, matches, depth=1) + continue + + # Otherwise split into further subranges. + + # Only enter the if-block if the input is less-or-equals to the + # largest value in the current range. + is_last_block = matches[-1] == code_list[-1] + if not is_last_block: + println(' if (ch <= {}) {{'.format(hexlit(matches[-1]))) + else: + println(' if (ch < {})'.format(hexlit(matches[0]))) + println(' return false;') + + for j in range(0, 16): + inner_start = start_point + (j << 8) + inner_end = start_point + ((j + 1) << 8) + inner_matches = [cu for cu in matches if inner_start <= cu < inner_end] + + if inner_matches: + d = 1 if is_last_block else 2 + write_range_accept(matches, inner_matches, depth=d) + + if not is_last_block: + println(' }') + + println('}') + + def write_LengthUpperCaseSpecialCasing(): + """ Slow case: Special casing character was found, returns its mapping length. """ + println('size_t') + println('js::unicode::LengthUpperCaseSpecialCasing(char16_t ch)') + println('{') + + println(' switch(ch) {') + for (code, converted) in sorted(unconditional_toupper.iteritems(), key=itemgetter(0)): + println(' case {}: return {}; // {}'.format(hexlit(code), len(converted), + codepoint_table.name(code))) + println(' }') + println('') + println(' MOZ_ASSERT_UNREACHABLE("Bad character input.");') + println(' return 0;') + + println('}') + + def write_AppendUpperCaseSpecialCasing(): + """ Slow case: Special casing character was found, append its mapping characters. """ + println('void') + println('js::unicode::AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index)') + println('{') + + println(' switch(ch) {') + for (code, converted) in sorted(unconditional_toupper.iteritems(), key=itemgetter(0)): + println(' case {}: // {}'.format(hexlit(code), codepoint_table.name(code))) + for ch in converted: + println(' elements[(*index)++] = {}; // {}'.format(hexlit(ch), + codepoint_table.name(ch))) + println(' return;') + println(' }') + println('') + println(' MOZ_ASSERT_UNREACHABLE("Bad character input.");') + println(' return;') + + println('}') + + write_CanUpperCaseSpecialCasing() + println('') + write_LengthUpperCaseSpecialCasing() + println('') + write_AppendUpperCaseSpecialCasing() + +def make_bmp_mapping_test(version, codepoint_table, unconditional_tolower, unconditional_toupper): + def unicodeEsc(n): + return '\u{:04X}'.format(n) + file_name = '../tests/ecma_5/String/string-upper-lower-mapping.js' - with io.open(file_name, mode='wb') as test_mapping: - test_mapping.write(warning_message) - test_mapping.write(unicode_version_message.format(version)) - test_mapping.write(public_domain) - test_mapping.write('var mapping = [\n') + with io.open(file_name, mode='wb') as output: + write = partial(print, file=output, sep='', end='') + println = partial(print, file=output, sep='', end='\n') + + write(warning_message) + write(unicode_version_message.format(version)) + write(public_domain) + println('var mapping = [') for code in range(0, MAX_BMP + 1): - entry = test_table.get(code) + entry = codepoint_table.get(code) if entry: - (upper, lower, name, alias) = entry - test_mapping.write(' [' + hex(upper) + ', ' + hex(lower) + '], /* ' + - name + (' (' + alias + ')' if alias else '') + ' */\n') + (upper, lower, _, _) = entry + upper = unconditional_toupper[code] if code in unconditional_toupper else [upper] + lower = unconditional_tolower[code] if code in unconditional_tolower else [lower] + println(' ["{}", "{}"], /* {} */'.format("".join(imap(unicodeEsc, upper)), + "".join(imap(unicodeEsc, lower)), + codepoint_table.name(code))) else: - test_mapping.write(' [' + hex(code) + ', ' + hex(code) + '],\n') - test_mapping.write('];') - test_mapping.write(""" + println(' ["{0}", "{0}"],'.format(unicodeEsc(code))) + println('];') + write(""" assertEq(mapping.length, 0x10000); for (var i = 0; i <= 0xffff; i++) { var char = String.fromCharCode(i); var info = mapping[i]; - assertEq(char.toUpperCase().charCodeAt(0), info[0]); - assertEq(char.toLowerCase().charCodeAt(0), info[1]); + assertEq(char.toUpperCase(), info[0]); + assertEq(char.toLowerCase(), info[1]); } if (typeof reportCompare === "function") reportCompare(true, true); """) -def make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map): +def make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map, codepoint_table): file_name = '../tests/ecma_6/String/string-code-point-upper-lower-mapping.js' with io.open(file_name, mode='wb') as test_non_bmp_mapping: test_non_bmp_mapping.write(warning_message) test_non_bmp_mapping.write(unicode_version_message.format(version)) test_non_bmp_mapping.write(public_domain) + for code in sorted(non_bmp_upper_map.keys()): test_non_bmp_mapping.write("""\ -assertEq(String.fromCodePoint(0x{:x}).toUpperCase().codePointAt(0), 0x{:x}); -""".format(code, non_bmp_upper_map[code])) +assertEq(String.fromCodePoint(0x{:04X}).toUpperCase().codePointAt(0), 0x{:04X}); // {}, {} +""".format(code, non_bmp_upper_map[code], + codepoint_table.name(code), codepoint_table.name(non_bmp_upper_map[code]))) + for code in sorted(non_bmp_lower_map.keys()): test_non_bmp_mapping.write("""\ -assertEq(String.fromCodePoint(0x{:x}).toLowerCase().codePointAt(0), 0x{:x}); -""".format(code, non_bmp_lower_map[code])) +assertEq(String.fromCodePoint(0x{:04X}).toLowerCase().codePointAt(0), 0x{:04X}); // {}, {} +""".format(code, non_bmp_lower_map[code], + codepoint_table.name(code), codepoint_table.name(non_bmp_lower_map[code]))) test_non_bmp_mapping.write(""" if (typeof reportCompare === "function") reportCompare(true, true); """) -def make_space_test(version, test_space_table): +def make_space_test(version, test_space_table, codepoint_table): + def hex_and_name(c): + return ' 0x{:04X} /* {} */'.format(c, codepoint_table.name(c)) + file_name = '../tests/ecma_5/String/string-space-trim.js' with io.open(file_name, mode='wb') as test_space: test_space.write(warning_message) test_space.write(unicode_version_message.format(version)) test_space.write(public_domain) - test_space.write('var onlySpace = String.fromCharCode(' + - ', '.join(map(lambda c: hex(c), test_space_table)) + ');\n') + test_space.write('var onlySpace = String.fromCharCode(\n') + test_space.write(',\n'.join(map(hex_and_name, test_space_table))) + test_space.write('\n);\n') test_space.write(""" assertEq(onlySpace.trim(), ""); assertEq((onlySpace + 'aaaa').trim(), 'aaaa'); @@ -544,7 +909,10 @@ if (typeof reportCompare === "function") reportCompare(true, true); """) -def make_icase_test(version, folding_tests): +def make_icase_test(version, folding_tests, codepoint_table): + def char_hex(c): + return '0x{:04X}'.format(c) + file_name = '../tests/ecma_6/RegExp/unicode-ignoreCase.js' with io.open(file_name, mode='wb') as test_icase: test_icase.write(warning_message) @@ -565,7 +933,8 @@ function test(code, ...equivs) { } """) for args in folding_tests: - test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n') + test_icase.write('test({}); // {}\n'.format(', '.join(map(char_hex, args)), + ', '.join(map(codepoint_table.name, args)))) test_icase.write(""" if (typeof reportCompare === "function") reportCompare(true, true); @@ -576,7 +945,9 @@ def make_unicode_file(version, same_upper_table, same_upper_index, folding_table, folding_index, non_bmp_space_set, - non_bmp_id_start_set, non_bmp_id_cont_set): + non_bmp_id_start_set, non_bmp_id_cont_set, + unconditional_toupper, + codepoint_table): index1, index2, shift = splitbins(index) # Don't forget to update CharInfo in Unicode.h if you need to change this @@ -665,8 +1036,8 @@ def make_unicode_file(version, * stop if you found the best shift */ """ - def dump(data, name, file): - file.write('const uint8_t unicode::' + name + '[] = {\n') + def dump(data, name, println): + println('const uint8_t unicode::{}[] = {{'.format(name)) line = pad = ' ' * 4 lines = [] @@ -682,93 +1053,79 @@ def make_unicode_file(version, line = line + s + ', ' lines.append(line.rstrip()) - file.write('\n'.join(lines)) - file.write('\n};\n') + println('\n'.join(lines)) + println('};') + + def write_table(data_type, name, tbl, idx1_name, idx1, idx2_name, idx2, println): + println('const {} unicode::{}[] = {{'.format(data_type, name)) + for d in tbl: + println(' {{ {} }},'.format(', '.join(str(e) for e in d))) + println('};') + println('') + + dump(idx1, idx1_name, println) + println('') + dump(idx2, idx2_name, println) + println('') + + def write_supplemental_identifier_method(name, group_set, println): + println('bool') + println('js::unicode::{}(uint32_t codePoint)'.format(name)) + println('{') + for (from_code, to_code) in int_ranges(group_set.keys()): + println(' if (codePoint >= 0x{:X} && codePoint <= 0x{:X}) // {} .. {}'.format(from_code, + to_code, + codepoint_table.name(from_code), + codepoint_table.name(to_code))) + println(' return true;') + println(' return false;') + println('}') + println('') file_name = 'Unicode.cpp' with io.open(file_name, 'wb') as data_file: - data_file.write(warning_message) - data_file.write(unicode_version_message.format(version)) - data_file.write(public_domain) - data_file.write('#include "vm/Unicode.h"\n\n') - data_file.write('using namespace js;\n') - data_file.write('using namespace js::unicode;\n') - data_file.write(comment) - data_file.write('const CharacterInfo unicode::js_charinfo[] = {\n') - for d in table: - data_file.write(' {') - data_file.write(', '.join((str(e) for e in d))) - data_file.write('},\n') - data_file.write('};\n') - data_file.write('\n') - - dump(index1, 'index1', data_file) - data_file.write('\n') - dump(index2, 'index2', data_file) - data_file.write('\n') - - data_file.write('const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = {\n') - for d in same_upper_table: - data_file.write(' {') - data_file.write(', '.join((str(e) for e in d))) - data_file.write('},\n') - data_file.write('};\n') - data_file.write('\n') - - dump(same_upper_index1, 'codepoints_with_same_upper_index1', data_file) - data_file.write('\n') - dump(same_upper_index2, 'codepoints_with_same_upper_index2', data_file) - data_file.write('\n') - - data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n') - for d in folding_table: - data_file.write(' {') - data_file.write(', '.join((str(e) for e in d))) - data_file.write('},\n') - data_file.write('};\n') - data_file.write('\n') - - dump(folding_index1, 'folding_index1', data_file) - data_file.write('\n') - dump(folding_index2, 'folding_index2', data_file) - data_file.write('\n') + write = partial(print, file=data_file, sep='', end='') + println = partial(print, file=data_file, sep='', end='\n') + + write(warning_message) + write(unicode_version_message.format(version)) + write(public_domain) + println('#include "vm/Unicode.h"') + println('') + println('using namespace js;') + println('using namespace js::unicode;') + write(comment) + + write_table('CharacterInfo', + 'js_charinfo', table, + 'index1', index1, + 'index2', index2, + println) + + write_table('CodepointsWithSameUpperCaseInfo', + 'js_codepoints_with_same_upper_info', same_upper_table, + 'codepoints_with_same_upper_index1', same_upper_index1, + 'codepoints_with_same_upper_index2', same_upper_index2, + println) + + write_table('FoldingInfo', + 'js_foldinfo', folding_table, + 'folding_index1', folding_index1, + 'folding_index2', folding_index2, + println) # If the following assert fails, it means space character is added to # non-BMP area. In that case the following code should be uncommented # and the corresponding code should be added to frontend. assert len(non_bmp_space_set.keys()) == 0 - data_file.write("""\ -bool -js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint) -{ -""") - - for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_start_set): - data_file.write("""\ - if (codePoint >= 0x{:x} && codePoint <= 0x{:x}) - return true; -""".format(from_code, to_code)) - - data_file.write("""\ - return false; -} - -bool -js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint) -{ -""") + write_supplemental_identifier_method('IsIdentifierStartNonBMP', non_bmp_id_start_set, + println) - for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_cont_set): - data_file.write("""\ - if (codePoint >= 0x{:x} && codePoint <= 0x{:x}) - return true; -""".format(from_code, to_code)) + write_supplemental_identifier_method('IsIdentifierPartNonBMP', non_bmp_id_cont_set, + println) - data_file.write("""\ - return false; -} -""") + write_special_casing_methods(unconditional_toupper, codepoint_table, println) def getsize(data): """ return smallest possible integer size for the given array """ @@ -842,10 +1199,8 @@ def splitbins(t): def make_irregexp_tables(version, table, index, folding_table, folding_index, - test_table): + codepoint_table): import string - from functools import partial - from itertools import chain, ifilter, imap MAX_ASCII = 0x7F MAX_LATIN1 = 0xFF @@ -894,13 +1249,13 @@ def make_irregexp_tables(version, def char_name(code): assert 0 <= code and code <= MAX_BMP - if code not in test_table: + if code not in codepoint_table: return '<Unused>' if code == LEAD_SURROGATE_MIN: return '<Lead Surrogate Min>' if code == TRAIL_SURROGATE_MAX: return '<Trail Surrogate Max>' - (_, _, name, alias) = test_table[code] + (_, _, name, alias) = codepoint_table[code] return name if not name.startswith('<') else alias def write_character_range(println, name, characters): @@ -1080,7 +1435,8 @@ def update_unicode(args): with download_or_open('UnicodeData.txt') as unicode_data, \ download_or_open('CaseFolding.txt') as case_folding, \ - download_or_open('DerivedCoreProperties.txt') as derived_core_properties: + download_or_open('DerivedCoreProperties.txt') as derived_core_properties, \ + download_or_open('SpecialCasing.txt') as special_casing: unicode_version = version_from_file(derived_core_properties, 'DerivedCoreProperties') print('Processing...') @@ -1090,13 +1446,16 @@ def update_unicode(args): non_bmp_lower_map, non_bmp_upper_map, non_bmp_space_set, non_bmp_id_start_set, non_bmp_id_cont_set, - test_table, test_space_table + codepoint_table, test_space_table ) = process_unicode_data(unicode_data, derived_core_properties) ( folding_table, folding_index, non_bmp_folding_map, non_bmp_rev_folding_map, folding_tests ) = process_case_folding(case_folding) + ( + unconditional_tolower, unconditional_toupper + ) = process_special_casing(special_casing, table, index) print('Generating...') make_unicode_file(unicode_version, @@ -1104,19 +1463,23 @@ def update_unicode(args): same_upper_table, same_upper_index, folding_table, folding_index, non_bmp_space_set, - non_bmp_id_start_set, non_bmp_id_cont_set) + non_bmp_id_start_set, non_bmp_id_cont_set, + unconditional_toupper, + codepoint_table) make_non_bmp_file(unicode_version, non_bmp_lower_map, non_bmp_upper_map, - non_bmp_folding_map, non_bmp_rev_folding_map) + non_bmp_folding_map, non_bmp_rev_folding_map, + codepoint_table) make_irregexp_tables(unicode_version, table, index, folding_table, folding_index, - test_table) + codepoint_table) - make_bmp_mapping_test(unicode_version, test_table) - make_non_bmp_mapping_test(unicode_version, non_bmp_upper_map, non_bmp_lower_map) - make_space_test(unicode_version, test_space_table) - make_icase_test(unicode_version, folding_tests) + make_bmp_mapping_test(unicode_version, + codepoint_table, unconditional_tolower, unconditional_toupper) + make_non_bmp_mapping_test(unicode_version, non_bmp_upper_map, non_bmp_lower_map, codepoint_table) + make_space_test(unicode_version, test_space_table, codepoint_table) + make_icase_test(unicode_version, folding_tests, codepoint_table) if __name__ == '__main__': import argparse |