summaryrefslogtreecommitdiff
path: root/js/src/builtin/intl/CommonFunctions.js
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/builtin/intl/CommonFunctions.js')
-rw-r--r--js/src/builtin/intl/CommonFunctions.js1703
1 files changed, 1190 insertions, 513 deletions
diff --git a/js/src/builtin/intl/CommonFunctions.js b/js/src/builtin/intl/CommonFunctions.js
index cf5a615721..36b2bec9b2 100644
--- a/js/src/builtin/intl/CommonFunctions.js
+++ b/js/src/builtin/intl/CommonFunctions.js
@@ -14,35 +14,70 @@ function hasOwn(propName, object) {
}
/**
- * Holder object for encapsulating regexp instances.
- *
- * Regular expression instances should be created after the initialization of
- * self-hosted global.
- */
-var internalIntlRegExps = std_Object_create(null);
-internalIntlRegExps.unicodeLocaleExtensionSequenceRE = null;
-internalIntlRegExps.languageTagRE = null;
-internalIntlRegExps.duplicateVariantRE = null;
-internalIntlRegExps.duplicateSingletonRE = null;
-internalIntlRegExps.isWellFormedCurrencyCodeRE = null;
-internalIntlRegExps.currencyDigitsRE = null;
-
-/**
- * Regular expression matching a "Unicode locale extension sequence", which the
+ * Returns the start index of a "Unicode locale extension sequence", which the
* specification defines as: "any substring of a language tag that starts with
* a separator '-' and the singleton 'u' and includes the maximum sequence of
* following non-singleton subtags and their preceding '-' separators."
*
* Alternatively, this may be defined as: the components of a language tag that
- * match the extension production in RFC 5646, where the singleton component is
- * "u".
+ * match the `unicode_locale_extensions` production in UTS 35.
*
* Spec: ECMAScript Internationalization API Specification, 6.2.1.
*/
-function getUnicodeLocaleExtensionSequenceRE() {
- return internalIntlRegExps.unicodeLocaleExtensionSequenceRE ||
- (internalIntlRegExps.unicodeLocaleExtensionSequenceRE =
- RegExpCreate("-u(?:-[a-z0-9]{2,8})+"));
+function startOfUnicodeExtensions(locale) {
+ assert(typeof locale === "string", "locale is a string");
+
+ // Search for "-u-" marking the start of a Unicode extension sequence.
+ var start = callFunction(std_String_indexOf, locale, "-u-");
+ if (start < 0)
+ return -1;
+
+ // And search for "-x-" marking the start of any privateuse component to
+ // handle the case when "-u-" was only found within a privateuse subtag.
+ var privateExt = callFunction(std_String_indexOf, locale, "-x-");
+ if (privateExt >= 0 && privateExt < start)
+ return -1;
+
+ return start;
+}
+
+/**
+ * Returns the end index of a Unicode locale extension sequence.
+ */
+function endOfUnicodeExtensions(locale, start) {
+ assert(typeof locale === "string", "locale is a string");
+ assert(IsStructurallyValidLanguageTag(locale), "locale is a language tag");
+ assert(CanonicalizeLanguageTag(locale) === locale, "locale is a canonicalized language tag");
+ assert(0 <= start && start < locale.length, "start is an index into locale");
+ assert(Substring(locale, start, 3) === "-u-", "start points to Unicode extension sequence");
+
+ #define HYPHEN 0x2D
+ assert(std_String_fromCharCode(HYPHEN) === "-",
+ "code unit constant should match the expected character");
+
+ // Search for the start of the next singleton or privateuse subtag.
+ //
+ // Begin searching after the smallest possible Unicode locale extension
+ // sequence, namely |"-u-" 2alphanum|. End searching once the remaining
+ // characters can't fit the smallest possible singleton or privateuse
+ // subtag, namely |"-x-" alphanum|. Note the reduced end-limit means
+ // indexing inside the loop is always in-range.
+ for (var i = start + 5, end = locale.length - 4; i <= end; i++) {
+ if (callFunction(std_String_charCodeAt, locale, i) !== HYPHEN)
+ continue;
+ if (callFunction(std_String_charCodeAt, locale, i + 2) === HYPHEN)
+ return i;
+
+ // Skip over (i + 1) and (i + 2) because we've just verified they
+ // aren't "-", so the next possible delimiter can only be at (i + 3).
+ i += 2;
+ }
+
+ #undef HYPHEN
+
+ // If no singleton or privateuse subtag was found, the Unicode extension
+ // sequence extends until the end of the string.
+ return locale.length;
}
@@ -50,226 +85,602 @@ function getUnicodeLocaleExtensionSequenceRE() {
* Removes Unicode locale extension sequences from the given language tag.
*/
function removeUnicodeExtensions(locale) {
- // A wholly-privateuse locale has no extension sequences.
- if (callFunction(std_String_startsWith, locale, "x-"))
+ var start = startOfUnicodeExtensions(locale);
+ if (start < 0)
return locale;
- // Otherwise, split on "-x-" marking the start of any privateuse component.
- // Replace Unicode locale extension sequences in the left half, and return
- // the concatenation.
- var pos = callFunction(std_String_indexOf, locale, "-x-");
- if (pos < 0)
- pos = locale.length;
-
- var left = callFunction(String_substring, locale, 0, pos);
- var right = callFunction(String_substring, locale, pos);
-
- var extensions;
- var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
- while ((extensions = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, left)) !== null) {
- left = StringReplaceString(left, extensions[0], "");
- unicodeLocaleExtensionSequenceRE.lastIndex = 0;
- }
+ var end = endOfUnicodeExtensions(locale, start);
+ var left = Substring(locale, 0, start);
+ var right = Substring(locale, end, locale.length - end);
var combined = left + right;
- assert(IsStructurallyValidLanguageTag(combined), "recombination produced an invalid language tag");
- assert(function() {
- var uindex = callFunction(std_String_indexOf, combined, "-u-");
- if (uindex < 0)
- return true;
- var xindex = callFunction(std_String_indexOf, combined, "-x-");
- return xindex > 0 && xindex < uindex;
- }(), "recombination failed to remove all Unicode locale extension sequences");
+
+ assert(IsStructurallyValidLanguageTag(combined),
+ "recombination produced an invalid language tag");
+ assert(startOfUnicodeExtensions(combined) < 0,
+ "recombination failed to remove all Unicode locale extension sequences");
return combined;
}
-
/**
- * Regular expression defining BCP 47 language tags.
- *
- * Spec: RFC 5646 section 2.1.
+ * Returns Unicode locale extension sequences from the given language tag.
*/
-function getLanguageTagRE() {
- if (internalIntlRegExps.languageTagRE)
- return internalIntlRegExps.languageTagRE;
-
- // RFC 5234 section B.1
- // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
- var ALPHA = "[a-zA-Z]";
- // DIGIT = %x30-39
- // ; 0-9
- var DIGIT = "[0-9]";
-
- // RFC 5646 section 2.1
- // alphanum = (ALPHA / DIGIT) ; letters and numbers
- var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
- // regular = "art-lojban" ; these tags match the 'langtag'
- // / "cel-gaulish" ; production, but their subtags
- // / "no-bok" ; are not extended language
- // / "no-nyn" ; or variant subtags: their meaning
- // / "zh-guoyu" ; is defined by their registration
- // / "zh-hakka" ; and all of these are deprecated
- // / "zh-min" ; in favor of a more modern
- // / "zh-min-nan" ; subtag or sequence of subtags
- // / "zh-xiang"
- var regular = "(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)";
- // irregular = "en-GB-oed" ; irregular tags do not match
- // / "i-ami" ; the 'langtag' production and
- // / "i-bnn" ; would not otherwise be
- // / "i-default" ; considered 'well-formed'
- // / "i-enochian" ; These tags are all valid,
- // / "i-hak" ; but most are deprecated
- // / "i-klingon" ; in favor of more modern
- // / "i-lux" ; subtags or subtag
- // / "i-mingo" ; combination
- // / "i-navajo"
- // / "i-pwn"
- // / "i-tao"
- // / "i-tay"
- // / "i-tsu"
- // / "sgn-BE-FR"
- // / "sgn-BE-NL"
- // / "sgn-CH-DE"
- var irregular = "(?:en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)";
- // grandfathered = irregular ; non-redundant tags registered
- // / regular ; during the RFC 3066 era
- var grandfathered = "(?:" + irregular + "|" + regular + ")";
- // privateuse = "x" 1*("-" (1*8alphanum))
- var privateuse = "(?:x(?:-[a-z0-9]{1,8})+)";
- // singleton = DIGIT ; 0 - 9
- // / %x41-57 ; A - W
- // / %x59-5A ; Y - Z
- // / %x61-77 ; a - w
- // / %x79-7A ; y - z
- var singleton = "(?:" + DIGIT + "|[A-WY-Za-wy-z])";
- // extension = singleton 1*("-" (2*8alphanum))
- var extension = "(?:" + singleton + "(?:-" + alphanum + "{2,8})+)";
- // variant = 5*8alphanum ; registered variants
- // / (DIGIT 3alphanum)
- var variant = "(?:" + alphanum + "{5,8}|(?:" + DIGIT + alphanum + "{3}))";
- // region = 2ALPHA ; ISO 3166-1 code
- // / 3DIGIT ; UN M.49 code
- var region = "(?:" + ALPHA + "{2}|" + DIGIT + "{3})";
- // script = 4ALPHA ; ISO 15924 code
- var script = "(?:" + ALPHA + "{4})";
- // extlang = 3ALPHA ; selected ISO 639 codes
- // *2("-" 3ALPHA) ; permanently reserved
- var extlang = "(?:" + ALPHA + "{3}(?:-" + ALPHA + "{3}){0,2})";
- // language = 2*3ALPHA ; shortest ISO 639 code
- // ["-" extlang] ; sometimes followed by
- // ; extended language subtags
- // / 4ALPHA ; or reserved for future use
- // / 5*8ALPHA ; or registered language subtag
- var language = "(?:" + ALPHA + "{2,3}(?:-" + extlang + ")?|" + ALPHA + "{4}|" + ALPHA + "{5,8})";
- // langtag = language
- // ["-" script]
- // ["-" region]
- // *("-" variant)
- // *("-" extension)
- // ["-" privateuse]
- var langtag = language + "(?:-" + script + ")?(?:-" + region + ")?(?:-" +
- variant + ")*(?:-" + extension + ")*(?:-" + privateuse + ")?";
- // Language-Tag = langtag ; normal language tags
- // / privateuse ; private use tag
- // / grandfathered ; grandfathered tags
- var languageTag = "^(?:" + langtag + "|" + privateuse + "|" + grandfathered + ")$";
-
- // Language tags are case insensitive (RFC 5646 section 2.1.1).
- return (internalIntlRegExps.languageTagRE = RegExpCreate(languageTag, "i"));
+function getUnicodeExtensions(locale) {
+ var start = startOfUnicodeExtensions(locale);
+ assert(start >= 0, "start of Unicode extension sequence not found");
+ var end = endOfUnicodeExtensions(locale, start);
+
+ return Substring(locale, start, end - start);
}
+// The three possible token type bits. Expressed as #defines to avoid
+// extra named lookups in the interpreter/jits.
+#define NONE 0b00
+#define ALPHA 0b01
+#define DIGIT 0b10
+
+// Constants for code units used below.
+#define HYPHEN 0x2D
+#define DIGIT_ZERO 0x30
+#define DIGIT_NINE 0x39
+#define UPPER_A 0x41
+#define UPPER_Z 0x5A
+#define LOWER_A 0x61
+#define LOWER_T 0x74
+#define LOWER_U 0x75
+#define LOWER_X 0x78
+#define LOWER_Z 0x7A
+
+// The requirement to use callFunction() for method calls makes the parser
+// harder to read. Use macros for the rescue.
+
+// Reads the next token.
+#define NEXT_TOKEN_OR_RETURN_NULL(ts) \
+ if (!callFunction(ts.nextToken, ts)) \
+ return null;
+
+#define NEXT_TOKEN_OR_ASSERT(ts) \
+ if (!callFunction(ts.nextToken, ts)) \
+ assert(false, "unexpected invalid subtag");
-function getDuplicateVariantRE() {
- if (internalIntlRegExps.duplicateVariantRE)
- return internalIntlRegExps.duplicateVariantRE;
-
- // RFC 5234 section B.1
- // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
- var ALPHA = "[a-zA-Z]";
- // DIGIT = %x30-39
- // ; 0-9
- var DIGIT = "[0-9]";
-
- // RFC 5646 section 2.1
- // alphanum = (ALPHA / DIGIT) ; letters and numbers
- var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
- // variant = 5*8alphanum ; registered variants
- // / (DIGIT 3alphanum)
- var variant = "(?:" + alphanum + "{5,8}|(?:" + DIGIT + alphanum + "{3}))";
-
- // Match a langtag that contains a duplicate variant.
- var duplicateVariant =
- // Match everything in a langtag prior to any variants, and maybe some
- // of the variants as well (which makes this pattern inefficient but
- // not wrong, for our purposes);
- "(?:" + alphanum + "{2,8}-)+" +
- // a variant, parenthesised so that we can refer back to it later;
- "(" + variant + ")-" +
- // zero or more subtags at least two characters long (thus stopping
- // before extension and privateuse components);
- "(?:" + alphanum + "{2,8}-)*" +
- // and the same variant again
- "\\1" +
- // ...but not followed by any characters that would turn it into a
- // different subtag.
- "(?!" + alphanum + ")";
-
- // Language tags are case insensitive (RFC 5646 section 2.1.1). Using
- // character classes covering both upper- and lower-case characters nearly
- // addresses this -- but for the possibility of variant repetition with
- // differing case, e.g. "en-variant-Variant". Use a case-insensitive
- // regular expression to address this. (Note that there's no worry about
- // case transformation accepting invalid characters here: users have
- // already verified the string is alphanumeric Latin plus "-".)
- return (internalIntlRegExps.duplicateVariantRE = RegExpCreate(duplicateVariant, "i"));
+// Assigns the current subtag part transformed to lower-case to the target.
+#define SUBTAG_VAR_OR_RETURN_NULL(ts, target) \
+ { \
+ target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength); \
+ NEXT_TOKEN_OR_RETURN_NULL(ts); \
+ }
+
+// Assigns the current subtag part transformed to lower-case to the target.
+#define SUBTAG_VAR_OR_ASSERT(ts, target) \
+ { \
+ target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength); \
+ NEXT_TOKEN_OR_ASSERT(ts) \
+ }
+
+/**
+ * Tokenizer for Unicode BCP 47 locale identifiers.
+ */
+function BCP47TokenStream(locale) {
+ this.locale = locale;
+
+ // Locale identifiers are compared and processed case-insensitively, so
+ // technically it's not necessary to adjust case. But for easier processing,
+ // and because the canonical form for most subtags is lower case, we start
+ // with lower case for all.
+ //
+ // Note that the tokenizer function keeps using the original input string
+ // to properly detect non-ASCII characters. The lower-case string can't be
+ // used to detect those characters, because some non-ASCII characters
+ // lower-case map into ASCII characters, e.g. U+212A (KELVIN SIGN) lower-
+ // case maps to U+006B (LATIN SMALL LETTER K).
+ this.localeLowercase = callFunction(std_String_toLowerCase, locale);
+
+ // Current parse index in |locale|.
+ this.index = 0;
+
+ // The current token type, its start index, and its length.
+ this.token = NONE;
+ this.tokenStart = 0;
+ this.tokenLength = 0;
+
+ assert(std_String_fromCharCode(HYPHEN) === "-" &&
+ std_String_fromCharCode(DIGIT_ZERO) === "0" &&
+ std_String_fromCharCode(DIGIT_NINE) === "9" &&
+ std_String_fromCharCode(UPPER_A) === "A" &&
+ std_String_fromCharCode(UPPER_Z) === "Z" &&
+ std_String_fromCharCode(LOWER_A) === "a" &&
+ std_String_fromCharCode(LOWER_T) === "t" &&
+ std_String_fromCharCode(LOWER_U) === "u" &&
+ std_String_fromCharCode(LOWER_X) === "x" &&
+ std_String_fromCharCode(LOWER_Z) === "z",
+ "code unit constants should match the expected characters");
}
+MakeConstructible(BCP47TokenStream, {
+ __proto__: null,
+
+ // Reads the next token, returns |false| if an illegal character was found,
+ // otherwise returns |true|.
+ //
+ // eslint-disable-next-line object-shorthand
+ nextToken: function() {
+ var type = NONE;
+ var {index, locale} = this;
+ for (var i = index; i < locale.length; i++) {
+ // UTS 35, section 3.1.
+ // alpha = [A-Z a-z] ;
+ // digit = [0-9] ;
+ var c = callFunction(std_String_charCodeAt, locale, i);
+ if ((UPPER_A <= c && c <= UPPER_Z) || (LOWER_A <= c && c <= LOWER_Z))
+ type |= ALPHA;
+ else if (DIGIT_ZERO <= c && c <= DIGIT_NINE)
+ type |= DIGIT;
+ else if (c === HYPHEN && i > index && i + 1 < locale.length)
+ break;
+ else
+ return false;
+ }
+
+ this.token = type;
+ this.tokenStart = index;
+ this.tokenLength = i - index;
+ this.index = i + 1;
+ return true;
+ },
+
+ // Returns true if the character at the requested index within the current
+ // token is a digit.
+ //
+ // eslint-disable-next-line object-shorthand
+ isDigitAt: function(index) {
+ assert(0 <= index && index < this.tokenLength,
+ "must be an index into the current token");
+ var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart + index);
+ assert(!(c <= DIGIT_NINE) || c >= DIGIT_ZERO,
+ "token-start-code-unit <= '9' implies token-start-code-unit is in '0'..'9' " +
+ "and because all digits are sorted before any letters");
+ return c <= DIGIT_NINE;
+ },
+
+ // Returns the code unit of the first character at the current token
+ // position. Always returns the lower-case form of an alphabetical
+ // character.
+ //
+ // eslint-disable-next-line object-shorthand
+ singletonKey: function() {
+ assert(this.tokenLength === 1, "token is not a singleton");
+ var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart);
+ assert((DIGIT_ZERO <= c && c <= DIGIT_NINE) || (LOWER_A <= c && c <= LOWER_Z),
+ "unexpected code unit");
+ return c;
+ },
+
+ // eslint-disable-next-line object-shorthand
+ singletonValue: function() {
+ var singletonStart = this.tokenStart;
+ var min = callFunction(this.singletonKey, this) === LOWER_X ? 1 : 2;
+
+ NEXT_TOKEN_OR_RETURN_NULL(this);
+
+ // At least one non-singleton subtag must be present.
+ if (!(min <= this.tokenLength && this.tokenLength <= 8))
+ return null;
+ do {
+ NEXT_TOKEN_OR_RETURN_NULL(this);
+ } while (min <= this.tokenLength && this.tokenLength <= 8);
+
+ return callFunction(this.singletonValueAt, this, singletonStart);
+ },
+
+ // eslint-disable-next-line object-shorthand
+ singletonValueAt: function(start) {
+ // Singletons must be followed by a non-singleton subtag, "en-a-b" is not allowed.
+ var length = this.tokenStart - 1 - start;
+ if (length <= 2)
+ return null;
+ return Substring(this.localeLowercase, start, length);
+ }
+});
+
+/* eslint-disable complexity */
+/**
+ * Parser for Unicode BCP 47 locale identifiers.
+ *
+ * Returns null if |locale| can't be parsed as a `unicode_locale_id`. If the
+ * input is a grandfathered language tag, it is directly canonicalized to its
+ * modern form. The returned object has the following structure:
+ *
+ * {
+ * language: `unicode_language_subtag`,
+ * script: `unicode_script_subtag` / undefined,
+ * region: `unicode_region_subtag` / undefined,
+ * variants: array of `unicode_variant_subtag`,
+ * extensions: array of `extensions`,
+ * privateuse: `pu_extensions` / undefined,
+ * }
+ *
+ * All locale identifier subtags are returned in their normalized case:
+ *
+ * var langtag = parseLanguageTag("en-latn-us");
+ * assertEq("en", langtag.language);
+ * assertEq("Latn", langtag.script);
+ * assertEq("US", langtag.region);
+ *
+ * Spec: https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers
+ */
+function parseLanguageTag(locale) {
+ assert(typeof locale === "string", "locale is a string");
+
+ // unicode_locale_id = unicode_language_id
+ // extensions*
+ // pu_extensions? ;
+ var ts = new BCP47TokenStream(locale);
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ var language, script, region, privateuse;
+ var variants = [];
+ var extensions = [];
+
+ // unicode_language_id = unicode_language_subtag
+ // (sep unicode_script_subtag)?
+ // (sep unicode_region_subtag)?
+ // (sep unicode_variant_subtag)* ;
+ //
+ // sep = "-"
+ //
+ // Note: Unicode CLDR locale identifier backward compatibility extensions
+ // removed from `unicode_language_id`.
+
+ // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+ if (ts.token !== ALPHA || ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8) {
+ // Four character language subtags are not allowed in Unicode BCP 47
+ // locale identifiers. Also see the comparison to Unicode CLDR locale
+ // identifiers in <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
+ return null;
+ }
+ assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
+ (5 <= ts.tokenLength && ts.tokenLength <= 8),
+ "language subtags have 2-3 or 5-8 letters");
+
+ SUBTAG_VAR_OR_RETURN_NULL(ts, language);
+
+ // unicode_script_subtag = alpha{4} ;
+ if (ts.tokenLength === 4 && ts.token === ALPHA) {
+ SUBTAG_VAR_OR_RETURN_NULL(ts, script);
-function getDuplicateSingletonRE() {
- if (internalIntlRegExps.duplicateSingletonRE)
- return internalIntlRegExps.duplicateSingletonRE;
-
- // RFC 5234 section B.1
- // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
- var ALPHA = "[a-zA-Z]";
- // DIGIT = %x30-39
- // ; 0-9
- var DIGIT = "[0-9]";
-
- // RFC 5646 section 2.1
- // alphanum = (ALPHA / DIGIT) ; letters and numbers
- var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
- // singleton = DIGIT ; 0 - 9
- // / %x41-57 ; A - W
- // / %x59-5A ; Y - Z
- // / %x61-77 ; a - w
- // / %x79-7A ; y - z
- var singleton = "(?:" + DIGIT + "|[A-WY-Za-wy-z])";
-
- // Match a langtag that contains a duplicate singleton.
- var duplicateSingleton =
- // Match a singleton subtag, parenthesised so that we can refer back to
- // it later;
- "-(" + singleton + ")-" +
- // then zero or more subtags;
- "(?:" + alphanum + "+-)*" +
- // and the same singleton again
- "\\1" +
- // ...but not followed by any characters that would turn it into a
- // different subtag.
- "(?!" + alphanum + ")";
-
- // Language tags are case insensitive (RFC 5646 section 2.1.1). Using
- // character classes covering both upper- and lower-case characters nearly
- // addresses this -- but for the possibility of singleton repetition with
- // differing case, e.g. "en-u-foo-U-foo". Use a case-insensitive regular
- // expression to address this. (Note that there's no worry about case
- // transformation accepting invalid characters here: users have already
- // verified the string is alphanumeric Latin plus "-".)
- return (internalIntlRegExps.duplicateSingletonRE = RegExpCreate(duplicateSingleton, "i"));
+ // The first character of a script code needs to be capitalized.
+ // "hans" -> "Hans"
+ script = callFunction(std_String_toUpperCase, script[0]) +
+ Substring(script, 1, script.length - 1);
+ }
+
+ // unicode_region_subtag = (alpha{2} | digit{3}) ;
+ if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+ (ts.tokenLength === 3 && ts.token === DIGIT))
+ {
+ SUBTAG_VAR_OR_RETURN_NULL(ts, region);
+
+ // Region codes need to be in upper-case. "bu" -> "BU"
+ region = callFunction(std_String_toUpperCase, region);
+ }
+
+ // unicode_variant_subtag = (alphanum{5,8}
+ // | digit alphanum{3}) ;
+ //
+ // alphanum = [0-9 A-Z a-z] ;
+ while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+ (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+ {
+ // Locale identifiers are case insensitive (UTS 35, section 3.2).
+ // All seen variants are compared ignoring case differences by
+ // using the lower-case form. This allows to properly detect and
+ // reject variant repetitions with differing case, e.g.
+ // "en-variant-Variant".
+ var variant;
+ SUBTAG_VAR_OR_RETURN_NULL(ts, variant);
+
+ // Reject the Locale identifier if a duplicate variant was found.
+ //
+ // This linear-time verification step means the whole variant
+ // subtag checking is potentially quadratic, but we're okay doing
+ // that because language tags are unlikely to be deliberately
+ // pathological.
+ if (callFunction(ArrayIndexOf, variants, variant) !== -1)
+ return null;
+ _DefineDataProperty(variants, variants.length, variant);
+ }
+
+ // extensions = unicode_locale_extensions
+ // | transformed_extensions
+ // | other_extensions ;
+ //
+ // unicode_locale_extensions = sep [uU]
+ // ((sep keyword)+
+ // |(sep attribute)+ (sep keyword)*) ;
+ //
+ // transformed_extensions = sep [tT]
+ // ((sep tlang (sep tfield)*)
+ // |(sep tfield)+) ;
+ //
+ // other_extensions = [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+ //
+ // keyword = key (sep type)? ;
+ //
+ // key = alphanum alpha ;
+ //
+ // type = alphanum{3,8} (sep alphanum{3,8})* ;
+ //
+ // attribute = alphanum{3,8} ;
+ //
+ // tlang = unicode_language_subtag
+ // (sep unicode_script_subtag)?
+ // (sep unicode_region_subtag)?
+ // (sep unicode_variant_subtag)* ;
+ //
+ // tfield = tkey tvalue;
+ //
+ // tkey = alpha digit ;
+ //
+ // tvalue = (sep alphanum{3,8})+ ;
+ var seenSingletons = [];
+ while (ts.tokenLength === 1) {
+ var singleton = callFunction(ts.singletonKey, ts);
+ if (singleton === LOWER_X)
+ break;
+
+ // Locale identifiers are case insensitive (UTS 35, section 3.2).
+ // Ensure |singletonKey()| does not return the code unit of an
+ // upper-case character, so we can properly detect and reject
+ // singletons with different case, e.g. "en-u-foo-U-foo".
+ assert(!(UPPER_A <= singleton && singleton <= UPPER_Z),
+ "unexpected upper-case code unit");
+
+ // Reject the input if a duplicate singleton was found.
+ //
+ // Similar to the variant validation step this check is O(n**2),
+ // but given that there are only 35 possible singletons the
+ // quadratic runtime is negligible.
+ if (callFunction(ArrayIndexOf, seenSingletons, singleton) !== -1)
+ return null;
+ _DefineDataProperty(seenSingletons, seenSingletons.length, singleton);
+
+ var extension;
+ if (singleton === LOWER_U) {
+ var extensionStart = ts.tokenStart;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ while (2 <= ts.tokenLength && ts.tokenLength <= 8) {
+ // `key` doesn't allow a digit as its second character.
+ if (ts.tokenLength === 2 && callFunction(ts.isDigitAt, ts, 1))
+ return null;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+ extension = callFunction(ts.singletonValueAt, ts, extensionStart);
+ } else if (singleton === LOWER_T) {
+ var extensionStart = ts.tokenStart;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
+ // an alpha-only token must be a `tlang`.
+ if (ts.token === ALPHA) {
+ // `unicode_language_subtag`
+ if (ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8)
+ return null;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ // `unicode_script_subtag` (optional)
+ if (ts.tokenLength === 4 && ts.token === ALPHA) {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+
+ // `unicode_region_subtag` (optional)
+ if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+ (ts.tokenLength === 3 && ts.token === DIGIT))
+ {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+
+ // `unicode_variant_subtag` (optional)
+ while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+ (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+ {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+ }
+
+ // Trailing `tfield` subtags.
+ while (ts.tokenLength === 2) {
+ // `tkey` is `alpha digit`.
+ if (callFunction(ts.isDigitAt, ts, 0) ||
+ !callFunction(ts.isDigitAt, ts, 1))
+ {
+ return null;
+ }
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ // `tfield` requires at least one `tvalue`.
+ if (!(3 <= ts.tokenLength && ts.tokenLength <= 8))
+ return null;
+ do {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ } while (3 <= ts.tokenLength && ts.tokenLength <= 8);
+ }
+ extension = callFunction(ts.singletonValueAt, ts, extensionStart);
+ } else {
+ extension = callFunction(ts.singletonValue, ts);
+ }
+ if (!extension)
+ return null;
+
+ _DefineDataProperty(extensions, extensions.length, extension);
+ }
+
+ // Trailing pu_extensions component of the unicode_locale_id production.
+ //
+ // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+ if (ts.tokenLength === 1 && callFunction(ts.singletonKey, ts) === LOWER_X) {
+ privateuse = callFunction(ts.singletonValue, ts);
+ if (!privateuse)
+ return null;
+ }
+
+ // Reject the input if it couldn't be parsed completely.
+ if (ts.token !== NONE)
+ return null;
+
+ var tagObj = {
+ language,
+ script,
+ region,
+ variants,
+ extensions,
+ privateuse,
+ };
+
+ // Handle grandfathered tags right away, so we don't need to have extra
+ // paths for grandfathered tags later on.
+ //
+ // grandfathered = "art-lojban" ; non-redundant tags registered
+ // / "cel-gaulish" ; during the RFC 3066 era
+ // / "zh-guoyu" ; these tags match the 'langtag'
+ // / "zh-hakka" ; production, but their subtags
+ // / "zh-xiang" ; are not extended language
+ // ; or variant subtags: their meaning
+ // ; is defined by their registration
+ // ; and all of these are deprecated
+ // ; in favor of a more modern
+ // ; subtag or sequence of subtags
+ if (hasOwn(ts.localeLowercase, grandfatheredMappings))
+ updateGrandfatheredMappings(tagObj);
+
+ // Return if the complete input was successfully parsed.
+ return tagObj;
}
+/**
+ * Return the locale and fields components of the given valid Transform
+ * extension subtag.
+ */
+function TransformExtensionComponents(extension) {
+ assert(typeof extension === "string", "extension is a String value");
+ assert(callFunction(std_String_startsWith, extension, "t-"),
+ "extension starts with 't-'");
+
+ var ts = new BCP47TokenStream(Substring(extension, 2, extension.length - 2));
+ NEXT_TOKEN_OR_ASSERT(ts);
+
+ // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
+ // an alpha-only token must be a `tlang`.
+ var localeObj;
+ if (ts.token === ALPHA) {
+ // `unicode_language_subtag`
+ assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
+ (5 <= ts.tokenLength && ts.tokenLength <= 8),
+ "language subtags have 2-3 or 5-8 letters");
+
+ var language;
+ SUBTAG_VAR_OR_ASSERT(ts, language);
+
+ // unicode_script_subtag = alpha{4} ;
+ var script;
+ if (ts.tokenLength === 4 && ts.token === ALPHA) {
+ SUBTAG_VAR_OR_ASSERT(ts, script);
+
+ // The first character of a script code needs to be capitalized.
+ // "hans" -> "Hans"
+ script = callFunction(std_String_toUpperCase, script[0]) +
+ Substring(script, 1, script.length - 1);
+ }
+
+ // unicode_region_subtag = (alpha{2} | digit{3}) ;
+ var region;
+ if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+ (ts.tokenLength === 3 && ts.token === DIGIT))
+ {
+ SUBTAG_VAR_OR_ASSERT(ts, region);
+
+ // Region codes need to be in upper-case. "bu" -> "BU"
+ region = callFunction(std_String_toUpperCase, region);
+ }
+
+ // unicode_variant_subtag = (alphanum{5,8}
+ // | digit alphanum{3}) ;
+ //
+ // alphanum = [0-9 A-Z a-z] ;
+ var variants = [];
+ while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+ (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+ {
+ var variant;
+ SUBTAG_VAR_OR_ASSERT(ts, variant);
+
+ _DefineDataProperty(variants, variants.length, variant);
+ }
+
+ localeObj = {
+ language,
+ script,
+ region,
+ variants,
+ extensions: [],
+ privateuse: undefined,
+ };
+ }
+
+ // Trailing `tfield` subtags. (Any other trailing subtags are an error,
+ // because we're guaranteed to only see a valid tranform extension here.)
+ var fields = [];
+ while (ts.tokenLength === 2) {
+ // `tkey` is `alpha digit`.
+ assert(!callFunction(ts.isDigitAt, ts, 0) && callFunction(ts.isDigitAt, ts, 1),
+ "unexpected invalid tkey subtag");
+
+ var key;
+ SUBTAG_VAR_OR_ASSERT(ts, key);
+
+ // `tfield` requires at least one `tvalue`.
+ assert(3 <= ts.tokenLength && ts.tokenLength <= 8,
+ "unexpected invalid tvalue subtag");
+
+ var value;
+ SUBTAG_VAR_OR_ASSERT(ts, value);
+
+ while (3 <= ts.tokenLength && ts.tokenLength <= 8) {
+ var part;
+ SUBTAG_VAR_OR_ASSERT(ts, part);
+ value += "-" + part;
+ }
+
+ _DefineDataProperty(fields, fields.length, {key, value});
+ }
+
+ assert(ts.token === NONE,
+ "unexpected trailing characters in promised-to-be-valid transform extension");
+
+ return {locale: localeObj, fields};
+}
+/* eslint-enable complexity */
+
+#undef NONE
+#undef ALPHA
+#undef DIGIT
+
+#undef HYPHEN
+#undef DIGIT_ZERO
+#undef DIGIT_NINE
+#undef UPPER_A
+#undef UPPER_Z
+#undef LOWER_A
+#undef LOWER_T
+#undef LOWER_U
+#undef LOWER_X
+#undef LOWER_Z
+
+#undef SUBTAG_VAR_OR_ASSERT
+#undef SUBTAG_VAR_OR_RETURN_NULL
+#undef NEXT_TOKEN_OR_ASSERT
+#undef NEXT_TOKEN_OR_RETURN_NULL
/**
* Verifies that the given string is a well-formed BCP 47 language tag
@@ -278,53 +689,369 @@ function getDuplicateSingletonRE() {
* Spec: ECMAScript Internationalization API Specification, 6.2.2.
*/
function IsStructurallyValidLanguageTag(locale) {
- assert(typeof locale === "string", "IsStructurallyValidLanguageTag");
- var languageTagRE = getLanguageTagRE();
- if (!regexp_test_no_statics(languageTagRE, locale))
- return false;
-
- // Before checking for duplicate variant or singleton subtags with
- // regular expressions, we have to get private use subtag sequences
- // out of the picture.
- if (callFunction(std_String_startsWith, locale, "x-"))
- return true;
- var pos = callFunction(std_String_indexOf, locale, "-x-");
- if (pos !== -1)
- locale = callFunction(String_substring, locale, 0, pos);
-
- // Check for duplicate variant or singleton subtags.
- var duplicateVariantRE = getDuplicateVariantRE();
- var duplicateSingletonRE = getDuplicateSingletonRE();
- return !regexp_test_no_statics(duplicateVariantRE, locale) &&
- !regexp_test_no_statics(duplicateSingletonRE, locale);
+ return parseLanguageTag(locale) !== null;
}
/**
- * Joins the array elements in the given range with the supplied separator.
+ * Canonicalizes the given structurally valid Unicode BCP 47 locale identifier,
+ * including regularized case of subtags. For example, the language tag
+ * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
+ *
+ * Zh ; 2*3ALPHA
+ * -haNS ; ["-" script]
+ * -bu ; ["-" region]
+ * -variant2 ; *("-" variant)
+ * -Variant1
+ * -u-ca-chinese ; *("-" extension)
+ * -t-Zh-laTN
+ * -x-PRIVATE ; ["-" privateuse]
+ *
+ * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
+ *
+ * UTS 35 specifies two different canonicalization algorithms. There's one to
+ * canonicalize BCP 47 language tags and other one to canonicalize Unicode
+ * locale identifiers. The latter one wasn't present when ECMA-402 was changed
+ * to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags, so
+ * ECMA-402 currently only uses the former to canonicalize Unicode BCP 47 locale
+ * identifiers.
+ *
+ * Spec: ECMAScript Internationalization API Specification, 6.2.3.
+ * Spec: https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
+ * Spec: https://unicode.org/reports/tr35/#BCP_47_Language_Tag_Conversion
*/
-function ArrayJoinRange(array, separator, from, to = array.length) {
- assert(typeof separator === "string", "|separator| is a string value");
- assert(typeof from === "number", "|from| is a number value");
- assert(typeof to === "number", "|to| is a number value");
- assert(0 <= from && from <= to && to <= array.length, "|from| and |to| form a valid range");
+function CanonicalizeLanguageTagObject(localeObj) {
+ assert(IsObject(localeObj), "CanonicalizeLanguageTagObject");
- if (from === to)
- return "";
+ // Per UTS 35, 3.3.1, the very first step is to canonicalize the syntax by
+ // normalizing the case and ordering all subtags. The canonical syntax form
+ // itself is specified in UTS 35, 3.2.1.
+
+ // The parser already normalized the case for all subtags.
- var result = array[from];
- for (var i = from + 1; i < to; i++) {
- result += separator + array[i];
+#ifdef DEBUG
+ function IsLowerCase(s) {
+ return s === callFunction(std_String_toLowerCase, s);
}
- return result;
+ function IsUpperCase(s) {
+ return s === callFunction(std_String_toUpperCase, s);
+ }
+ function IsTitleCase(s) {
+ assert(s.length > 0, "unexpected empy string");
+ var r = callFunction(std_String_toUpperCase, s[0]) +
+ callFunction(std_String_toLowerCase, Substring(s, 1, s.length - 1));
+ return s === r;
+ }
+#endif
+
+ // 1. Any script subtag is in title case.
+ assert(localeObj.script === undefined || IsTitleCase(localeObj.script),
+ "If present, script subtag is in title case");
+
+ // 2. Any region subtag is in uppercase.
+ assert(localeObj.region === undefined || IsUpperCase(localeObj.region),
+ "If present, region subtag is in upper case");
+
+ // 3. All other subtags are in lowercase.
+ assert(IsLowerCase(localeObj.language),
+ "language subtag is in lower case");
+ assert(callFunction(ArrayEvery, localeObj.variants, IsLowerCase),
+ "variant subtags are in lower case");
+ assert(callFunction(ArrayEvery, localeObj.extensions, IsLowerCase),
+ "extension subtags are in lower case");
+ assert(localeObj.privateuse === undefined || IsLowerCase(localeObj.privateuse),
+ "If present, privateuse subtag is in lower case");
+
+
+ // The second step in UTS 35, 3.2.1, is to order all subtags.
+
+ // 1. Any variants are in alphabetical order.
+ var variants = localeObj.variants;
+ if (variants.length > 0) {
+ callFunction(ArraySort, variants);
+ }
+
+ // 2. Any extensions are in alphabetical order by their singleton.
+ var extensions = localeObj.extensions;
+ if (extensions.length > 0) {
+ // Extension sequences are sorted by their singleton characters.
+ // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
+ callFunction(ArraySort, extensions);
+
+ // The last three bullet points in UTS 35, 3.2.1 apply only to Unicode and Transform
+ // extensions.
+ //
+ // 3. All attributes are sorted in alphabetical order.
+ //
+ // 4. All keywords and tfields are sorted by alphabetical order of their
+ // keys, within their respective extensions.
+ //
+ // 5. Any type or tfield value "true" is removed.
+
+ for (var i = 0; i < extensions.length; i++) {
+ var ext = extensions[i];
+ assert(IsLowerCase(ext),
+ "extension subtags must be in lower-case");
+ assert(ext[1] === "-",
+ "extension subtags start with a singleton");
+
+ // Canonicalize Unicode locale extension subtag if present.
+ if (ext[0] === "u") {
+ var {attributes, keywords} = UnicodeExtensionComponents(ext);
+ extensions[i] = CanonicalizeUnicodeExtension(attributes, keywords);
+ }
+
+ // Canonicalize Unicode BCP 47 T extension if present.
+ if (ext[0] === "t") {
+ var {locale, fields} = TransformExtensionComponents(ext);
+ extensions[i] = CanonicalizeTransformExtension(locale, fields);
+ }
+ }
+ }
+
+ // The next two steps in 3.3.1 replace deprecated language and region
+ // subtags with their preferred mappings.
+ updateLocaleIdMappings(localeObj);
+
+ // The two final steps in 3.3.1, handling irregular grandfathered and
+ // private-use only language tags, don't apply, because these two forms
+ // can't occur in Unicode BCP 47 locale identifiers.
+}
+
+/**
+ * Intl.Locale proposal
+ *
+ * UnicodeExtensionComponents( extension )
+ *
+ * Returns the components of |extension| where |extension| is a "Unicode locale
+ * extension sequence" (ECMA-402, 6.2.1) without the starting separator
+ * character.
+ */
+function UnicodeExtensionComponents(extension) {
+ assert(typeof extension === "string", "extension is a String value");
+
+ // Step 1.
+ var attributes = [];
+
+ // Step 2.
+ var keywords = [];
+
+ // Step 3.
+ var isKeyword = false;
+
+ // Step 4.
+ var size = extension.length;
+
+ // Step 5.
+ // |extension| starts with "u-" instead of "-u-" in our implementation, so
+ // we need to initialize |k| with 2 instead of 3.
+ assert(callFunction(std_String_startsWith, extension, "u-"),
+ "extension starts with 'u-'");
+ var k = 2;
+
+ // Step 6.
+ var key, value;
+ while (k < size) {
+ // Step 6.a.
+ var e = callFunction(std_String_indexOf, extension, "-", k);
+
+ // Step 6.b.
+ var len = (e < 0 ? size : e) - k;
+
+ // Step 6.c.
+ var subtag = Substring(extension, k, len);
+
+ // Steps 6.d-e.
+ if (!isKeyword) {
+ // Step 6.d.
+ // NB: Duplicates are handled elsewhere in our implementation.
+ if (len !== 2)
+ _DefineDataProperty(attributes, attributes.length, subtag);
+ } else {
+ // Steps 6.e.i-ii.
+ if (len === 2) {
+ // Step 6.e.i.1.
+ // NB: Duplicates are handled elsewhere in our implementation.
+ _DefineDataProperty(keywords, keywords.length, {key, value});
+ } else {
+ // Step 6.e.ii.1.
+ if (value !== "")
+ value += "-";
+
+ // Step 6.e.ii.2.
+ value += subtag;
+ }
+ }
+
+ // Step 6.f.
+ if (len === 2) {
+ // Step 6.f.i.
+ isKeyword = true;
+
+ // Step 6.f.ii.
+ key = subtag;
+
+ // Step 6.f.iii.
+ value = "";
+ }
+
+ // Step 6.g.
+ k += len + 1;
+ }
+
+ // Step 7.
+ if (isKeyword) {
+ // Step 7.a.
+ // NB: Duplicates are handled elsewhere in our implementation.
+ _DefineDataProperty(keywords, keywords.length, {key, value});
+ }
+
+ // Step 8.
+ return {attributes, keywords};
+}
+
+/**
+ * CanonicalizeUnicodeExtension( attributes, keywords )
+ *
+ * Canonical syntax per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All attributes and keywords are in lowercase.
+ * - Note: The parser already converted keywords to lowercase.
+ * - All attributes are sorted in alphabetical order.
+ * - All keywords are sorted by alphabetical order of their keys.
+ * - Any type value "true" is removed.
+ *
+ * Canonical form:
+ * - All keys and types use the canonical form (from the name attribute;
+ * see Section 3.6.4 U Extension Data Files).
+ */
+function CanonicalizeUnicodeExtension(attributes, keywords) {
+ assert(attributes.length > 0 || keywords.length > 0,
+ "unexpected empty Unicode locale extension components");
+
+ // All attributes are sorted in alphabetical order.
+ if (attributes.length > 1)
+ callFunction(ArraySort, attributes);
+
+ // All keywords are sorted by alphabetical order of keys.
+ if (keywords.length > 1) {
+ function UnicodeKeySort(left, right) {
+ var leftKey = left.key;
+ var rightKey = right.key;
+ assert(leftKey.length === 2, "left key is a Unicode key");
+ assert(rightKey.length === 2, "right key is a Unicode key");
+
+ // Compare both strings using charCodeAt(), because relational
+ // string comparison always calls into the VM, whereas charCodeAt
+ // can be inlined by Ion.
+ var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
+ callFunction(std_String_charCodeAt, rightKey, 0);
+ if (diff === 0) {
+ diff = callFunction(std_String_charCodeAt, leftKey, 1) -
+ callFunction(std_String_charCodeAt, rightKey, 1);
+ }
+ return diff;
+ }
+
+ callFunction(ArraySort, keywords, UnicodeKeySort);
+ }
+
+ var extension = "u";
+
+ // Append all attributes.
+ for (var i = 0; i < attributes.length; i++) {
+ extension += "-" + attributes[i];
+ }
+
+ // Append all keywords.
+ for (var i = 0; i < keywords.length; i++) {
+ var {key, value} = keywords[i];
+ extension += "-" + key;
+
+ // Type value "true" is removed.
+ if (value !== "" && value !== "true")
+ extension += "-" + value;
+ }
+
+ return extension;
+}
+
+/**
+ * CanonicalizeTransformExtension
+ *
+ * Canonical form per <https://unicode.org/reports/tr35/#BCP47_T_Extension>:
+ *
+ * - These subtags are all in lowercase (that is the canonical casing for these
+ * subtags), [...].
+ *
+ * And per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All keywords and tfields are sorted by alphabetical order of their keys,
+ * within their respective extensions.
+ */
+function CanonicalizeTransformExtension(localeObj, fields) {
+ assert(localeObj !== undefined || fields.length > 0,
+ "unexpected empty Transform locale extension components");
+
+ if (fields.length > 0) {
+ function TransformKeySort(left, right) {
+ var leftKey = left.key;
+ var rightKey = right.key;
+ assert(leftKey.length === 2, "left key is a Transform key");
+ assert(rightKey.length === 2, "right key is a Transform key");
+
+ // Compare both strings using charCodeAt(), because relational
+ // string comparison always calls into the VM, whereas charCodeAt
+ // can be inlined by Ion.
+ var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
+ callFunction(std_String_charCodeAt, rightKey, 0);
+ if (diff === 0) {
+ diff = callFunction(std_String_charCodeAt, leftKey, 1) -
+ callFunction(std_String_charCodeAt, rightKey, 1);
+ }
+ return diff;
+ }
+
+ callFunction(ArraySort, fields, TransformKeySort);
+ }
+
+ var extension = "t";
+
+ // Append the language subtag if present.
+ if (localeObj !== undefined) {
+ // [1] is a bit unclear whether or not the `tlang` subtag also needs
+ // to be canonicalized (and case-adjusted). For now simply append it as
+ // is and change it to all lower-case. If we switch to [2], the `tlang`
+ // subtag also needs to be canonicalized according to the same rules as
+ // `unicode_language_id` subtags are canonicalized. Also see [3].
+ //
+ // [1] https://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier
+ // [2] https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
+ // [3] https://github.com/tc39/ecma402/issues/330
+ var localeStr = StringFromLanguageTagObject(localeObj);
+ extension += "-" + callFunction(std_String_toLowerCase, localeStr);
+ }
+
+ // Append all fields.
+ for (var i = 0; i < fields.length; i++) {
+ // UTS 35, 3.2.1 specifies:
+ // - Any type or tfield value "true" is removed.
+ //
+ // But the `tvalue` subtag is mandatory in `tfield: tkey tvalue`, so
+ // ignore this apparently invalid part of the UTS 35 specification and
+ // simply append all `tfield` subtags.
+ var {key, value} = fields[i];
+ extension += "-" + key + "-" + value;
+ }
+
+ return extension;
}
/**
* Canonicalizes the given structurally valid BCP 47 language tag, including
* regularized case of subtags. For example, the language tag
- * Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
+ * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
*
* Zh ; 2*3ALPHA
- * -NAN ; ["-" extlang]
* -haNS ; ["-" script]
* -bu ; ["-" region]
* -variant2 ; *("-" variant)
@@ -333,120 +1060,54 @@ function ArrayJoinRange(array, separator, from, to = array.length) {
* -t-Zh-laTN
* -x-PRIVATE ; ["-" privateuse]
*
- * becomes nan-Hans-mm-variant2-variant1-t-zh-latn-u-ca-chinese-x-private
+ * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
*
* Spec: ECMAScript Internationalization API Specification, 6.2.3.
- * Spec: RFC 5646, section 4.5.
*/
function CanonicalizeLanguageTag(locale) {
- assert(IsStructurallyValidLanguageTag(locale), "CanonicalizeLanguageTag");
+ var localeObj = parseLanguageTag(locale);
+ assert(localeObj !== null, "CanonicalizeLanguageTag");
- // The input
- // "Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE"
- // will be used throughout this method to illustrate how it works.
+ CanonicalizeLanguageTagObject(localeObj);
- // Language tags are compared and processed case-insensitively, so
- // technically it's not necessary to adjust case. But for easier processing,
- // and because the canonical form for most subtags is lower case, we start
- // with lower case for all.
- // "Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE" ->
- // "zh-nan-hans-bu-variant2-variant1-u-ca-chinese-t-zh-latn-x-private"
- locale = callFunction(std_String_toLowerCase, locale);
-
- // Handle mappings for complete tags.
- if (hasOwn(locale, langTagMappings))
- return langTagMappings[locale];
-
- var subtags = StringSplitString(ToString(locale), "-");
- var i = 0;
-
- // Handle the standard part: All subtags before the first singleton or "x".
- // "zh-nan-hans-bu-variant2-variant1"
- while (i < subtags.length) {
- var subtag = subtags[i];
-
- // If we reach the start of an extension sequence or private use part,
- // we're done with this loop. We have to check for i > 0 because for
- // irregular language tags, such as i-klingon, the single-character
- // subtag "i" is not the start of an extension sequence.
- // In the example, we break at "u".
- if (subtag.length === 1 && (i > 0 || subtag === "x"))
- break;
+ return StringFromLanguageTagObject(localeObj);
+}
- if (i !== 0) {
- if (subtag.length === 4) {
- // 4-character subtags that are not in initial position are
- // script codes; their first character needs to be capitalized.
- // "hans" -> "Hans"
- subtag = callFunction(std_String_toUpperCase, subtag[0]) +
- callFunction(String_substring, subtag, 1);
- } else if (subtag.length === 2) {
- // 2-character subtags that are not in initial position are
- // region codes; they need to be upper case. "bu" -> "BU"
- subtag = callFunction(std_String_toUpperCase, subtag);
- }
- }
- if (hasOwn(subtag, langSubtagMappings)) {
- // Replace deprecated subtags with their preferred values.
- // "BU" -> "MM"
- // This has to come after we capitalize region codes because
- // otherwise some language and region codes could be confused.
- // For example, "in" is an obsolete language code for Indonesian,
- // but "IN" is the country code for India.
- // Note that the script generating langSubtagMappings makes sure
- // that no regular subtag mapping will replace an extlang code.
- subtag = langSubtagMappings[subtag];
- } else if (hasOwn(subtag, extlangMappings)) {
- // Replace deprecated extlang subtags with their preferred values,
- // and remove the preceding subtag if it's a redundant prefix.
- // "zh-nan" -> "nan"
- // Note that the script generating extlangMappings makes sure that
- // no extlang mapping will replace a normal language code.
- subtag = extlangMappings[subtag].preferred;
- if (i === 1 && extlangMappings[subtag].prefix === subtags[0]) {
- callFunction(std_Array_shift, subtags);
- i--;
- }
- }
- subtags[i] = subtag;
- i++;
- }
- var normal = ArrayJoinRange(subtags, "-", 0, i);
-
- // Extension sequences are sorted by their singleton characters.
- // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
- var extensions = new List();
- while (i < subtags.length && subtags[i] !== "x") {
- var extensionStart = i;
- i++;
- while (i < subtags.length && subtags[i].length > 1)
- i++;
- var extension = ArrayJoinRange(subtags, "-", extensionStart, i);
- callFunction(std_Array_push, extensions, extension);
- }
- callFunction(std_Array_sort, extensions);
+/**
+ * Returns the string representation of the given language tag object.
+ */
+function StringFromLanguageTagObject(localeObj) {
+ assert(IsObject(localeObj), "StringFromLanguageTagObject");
+
+ var {
+ language,
+ script,
+ region,
+ variants,
+ extensions,
+ privateuse,
+ } = localeObj;
- // Private use sequences are left as is. "x-private"
- var privateUse = "";
- if (i < subtags.length)
- privateUse = ArrayJoinRange(subtags, "-", i);
+ var canonical = language;
+
+ if (script !== undefined)
+ canonical += "-" + script;
+
+ if (region !== undefined)
+ canonical += "-" + region;
+
+ if (variants.length > 0)
+ canonical += "-" + callFunction(std_Array_join, variants, "-");
- // Put everything back together.
- var canonical = normal;
if (extensions.length > 0)
canonical += "-" + callFunction(std_Array_join, extensions, "-");
- if (privateUse.length > 0) {
- // Be careful of a Language-Tag that is entirely privateuse.
- if (canonical.length > 0)
- canonical += "-" + privateUse;
- else
- canonical = privateUse;
- }
+
+ if (privateuse !== undefined)
+ canonical += "-" + privateuse;
return canonical;
}
-
/**
* Returns true if the input contains only ASCII alphabetical characters.
*/
@@ -469,13 +1130,11 @@ function ValidateAndCanonicalizeLanguageTag(locale) {
assert(typeof locale === "string", "ValidateAndCanonicalizeLanguageTag");
// Handle the common case (a standalone language) first.
- // Only the following BCP47 subset is accepted:
- // Language-Tag = langtag
- // langtag = language
- // language = 2*3ALPHA ; shortest ISO 639 code
- // For three character long strings we need to make sure it's not a
- // private use only language tag, for example "x-x".
- if (locale.length === 2 || (locale.length === 3 && locale[1] !== "-")) {
+ // Only the following Unicode BCP 47 locale identifier subset is accepted:
+ // unicode_locale_id = unicode_language_id
+ // unicode_language_id = unicode_language_subtag
+ // unicode_language_subtag = alpha{2,3}
+ if (locale.length === 2 || locale.length === 3) {
if (!IsASCIIAlphaString(locale))
ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
assert(IsStructurallyValidLanguageTag(locale), "2*3ALPHA is a valid language tag");
@@ -483,42 +1142,27 @@ function ValidateAndCanonicalizeLanguageTag(locale) {
// The language subtag is canonicalized to lower case.
locale = callFunction(std_String_toLowerCase, locale);
- // langTagMappings doesn't contain any 2*3ALPHA keys, so we don't need
- // to check for possible replacements in this map.
- assert(!callFunction(std_Object_hasOwnProperty, langTagMappings, locale),
- "langTagMappings contains no 2*3ALPHA mappings");
-
- // Replace deprecated subtags with their preferred values.
- locale = callFunction(std_Object_hasOwnProperty, langSubtagMappings, locale)
- ? langSubtagMappings[locale]
- : locale;
- assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
+ // updateLocaleIdMappings may modify tags containing only |language|
+ // subtags, if the language is in |complexLanguageMappings|, so we need
+ // to handle that case first.
+ if (!hasOwn(locale, complexLanguageMappings)) {
+ // Replace deprecated subtags with their preferred values.
+ locale = hasOwn(locale, languageMappings)
+ ? languageMappings[locale]
+ : locale;
+ assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
- return locale;
+ return locale;
+ }
}
- if (!IsStructurallyValidLanguageTag(locale))
+ var localeObj = parseLanguageTag(locale);
+ if (localeObj === null)
ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
- return CanonicalizeLanguageTag(locale);
-}
-
-
-function localeContainsNoUnicodeExtensions(locale) {
- // No "-u-", no possible Unicode extension.
- if (callFunction(std_String_indexOf, locale, "-u-") === -1)
- return true;
-
- // "-u-" within privateuse also isn't one.
- if (callFunction(std_String_indexOf, locale, "-u-") > callFunction(std_String_indexOf, locale, "-x-"))
- return true;
-
- // An entirely-privateuse tag doesn't contain extensions.
- if (callFunction(std_String_startsWith, locale, "x-"))
- return true;
+ CanonicalizeLanguageTagObject(localeObj);
- // Otherwise, we have a Unicode extension sequence.
- return false;
+ return StringFromLanguageTagObject(localeObj);
}
@@ -571,11 +1215,13 @@ function DefaultLocaleIgnoringAvailableLocales() {
// If we didn't get a cache hit, compute the candidate default locale and
// cache it. Fall back on the last-ditch locale when necessary.
- var candidate;
- if (!IsStructurallyValidLanguageTag(runtimeDefaultLocale)) {
+ var candidate = parseLanguageTag(runtimeDefaultLocale);
+ if (candidate === null) {
candidate = lastDitchLocale();
} else {
- candidate = CanonicalizeLanguageTag(runtimeDefaultLocale);
+ CanonicalizeLanguageTagObject(candidate);
+
+ candidate = StringFromLanguageTagObject(candidate);
// The default locale must be in [[availableLocales]], and that list
// must not contain any locales with Unicode extension sequences, so
@@ -592,7 +1238,7 @@ function DefaultLocaleIgnoringAvailableLocales() {
assert(IsStructurallyValidLanguageTag(candidate),
"the candidate must be structurally valid");
- assert(localeContainsNoUnicodeExtensions(candidate),
+ assert(startOfUnicodeExtensions(candidate) < 0,
"the candidate must not contain a Unicode extension sequence");
return candidate;
@@ -633,7 +1279,7 @@ function DefaultLocale() {
"the computed default locale must be structurally valid");
assert(locale === CanonicalizeLanguageTag(locale),
"the computed default locale must be canonical");
- assert(localeContainsNoUnicodeExtensions(locale),
+ assert(startOfUnicodeExtensions(locale) < 0,
"the computed default locale must not contain a Unicode extension sequence");
localeCache.defaultLocale = locale;
@@ -674,30 +1320,53 @@ function addSpecialMissingLanguageTags(availableLocales) {
* Spec: ECMAScript Internationalization API Specification, 9.2.1.
*/
function CanonicalizeLocaleList(locales) {
+ // Step 1.
if (locales === undefined)
- return new List();
- var seen = new List();
+ return [];
+
+ // Step 3 (and the remaining steps).
if (typeof locales === "string")
- locales = [locales];
+ return [ValidateAndCanonicalizeLanguageTag(locales)];
+
+ // Step 2.
+ var seen = [];
+
+ // Step 4.
var O = ToObject(locales);
+
+ // Step 5.
var len = ToLength(O.length);
+
+ // Step 6.
var k = 0;
+
+ // Step 7.
while (k < len) {
- // Don't call ToString(k) - SpiderMonkey is faster with integers.
- var kPresent = HasProperty(O, k);
- if (kPresent) {
+ // Steps 7.a-c.
+ if (k in O) {
+ // Step 7.c.i.
var kValue = O[k];
+
+ // Step 7.c.ii.
if (!(typeof kValue === "string" || IsObject(kValue)))
ThrowTypeError(JSMSG_INVALID_LOCALES_ELEMENT);
+
+ // Step 7.c.iii.
var tag = ToString(kValue);
- if (!IsStructurallyValidLanguageTag(tag))
- ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, tag);
- tag = CanonicalizeLanguageTag(tag);
+
+ // Step 7.c.iv.
+ tag = ValidateAndCanonicalizeLanguageTag(tag);
+
+ // Step 7.c.v.
if (callFunction(ArrayIndexOf, seen, tag) === -1)
- callFunction(std_Array_push, seen, tag);
+ _DefineDataProperty(seen, seen.length, tag);
}
+
+ // Step 7.d.
k++;
}
+
+ // Step 8.
return seen;
}
@@ -705,7 +1374,7 @@ function CanonicalizeLocaleList(locales) {
function BestAvailableLocaleHelper(availableLocales, locale, considerDefaultLocale) {
assert(IsStructurallyValidLanguageTag(locale), "invalid BestAvailableLocale locale structure");
assert(locale === CanonicalizeLanguageTag(locale), "non-canonical BestAvailableLocale locale");
- assert(localeContainsNoUnicodeExtensions(locale), "locale must contain no Unicode extensions");
+ assert(startOfUnicodeExtensions(locale) < 0, "locale must contain no Unicode extensions");
// In the spec, [[availableLocales]] is formally a list of all available
// locales. But in our implementation, it's an *incomplete* list, not
@@ -780,28 +1449,37 @@ function BestAvailableLocaleIgnoringDefault(availableLocales, locale) {
* Spec: RFC 4647, section 3.4.
*/
function LookupMatcher(availableLocales, requestedLocales) {
- var i = 0;
- var len = requestedLocales.length;
- var availableLocale;
- var locale, noExtensionsLocale;
- while (i < len && availableLocale === undefined) {
- locale = requestedLocales[i];
- noExtensionsLocale = removeUnicodeExtensions(locale);
- availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
- i++;
- }
-
+ // Step 1.
var result = new Record();
- if (availableLocale !== undefined) {
- result.locale = availableLocale;
- if (locale !== noExtensionsLocale) {
- var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
- var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, locale);
- result.extension = extensionMatch[0];
+
+ // Step 2.
+ for (var i = 0; i < requestedLocales.length; i++) {
+ var locale = requestedLocales[i];
+
+ // Step 2.a.
+ var noExtensionsLocale = removeUnicodeExtensions(locale);
+
+ // Step 2.b.
+ var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
+
+ // Step 2.c.
+ if (availableLocale !== undefined) {
+ // Step 2.c.i.
+ result.locale = availableLocale;
+
+ // Step 2.c.ii.
+ if (locale !== noExtensionsLocale)
+ result.extension = getUnicodeExtensions(locale);
+
+ // Step 2.c.iii.
+ return result;
}
- } else {
- result.locale = DefaultLocale();
}
+
+ // Steps 3-4.
+ result.locale = DefaultLocale();
+
+ // Step 5.
return result;
}
@@ -823,73 +1501,73 @@ function BestFitMatcher(availableLocales, requestedLocales) {
/**
* Returns the Unicode extension value subtags for the requested key subtag.
*
- * NOTE: PR to add UnicodeExtensionValue to ECMA-402 isn't yet written.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.5.
*/
function UnicodeExtensionValue(extension, key) {
assert(typeof extension === "string", "extension is a string value");
- assert(function() {
- var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
- var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, extension);
- return extensionMatch !== null && extensionMatch[0] === extension;
- }(), "extension is a Unicode extension subtag");
+ assert(callFunction(std_String_startsWith, extension, "-u-") &&
+ getUnicodeExtensions("und" + extension) === extension,
+ "extension is a Unicode extension subtag");
assert(typeof key === "string", "key is a string value");
- assert(key.length === 2, "key is a Unicode extension key subtag");
// Step 1.
- var size = extension.length;
+ assert(key.length === 2, "key is a Unicode extension key subtag");
// Step 2.
- var searchValue = "-" + key + "-";
+ var size = extension.length;
// Step 3.
- var pos = callFunction(std_String_indexOf, extension, searchValue);
+ var searchValue = "-" + key + "-";
// Step 4.
+ var pos = callFunction(std_String_indexOf, extension, searchValue);
+
+ // Step 5.
if (pos !== -1) {
- // Step 4.a.
+ // Step 5.a.
var start = pos + 4;
- // Step 4.b.
+ // Step 5.b.
var end = start;
- // Step 4.c.
+ // Step 5.c.
var k = start;
- // Steps 4.d-e.
+ // Steps 5.d-e.
while (true) {
- // Step 4.e.i.
+ // Step 5.e.i.
var e = callFunction(std_String_indexOf, extension, "-", k);
- // Step 4.e.ii.
+ // Step 5.e.ii.
var len = e === -1 ? size - k : e - k;
- // Step 4.e.iii.
+ // Step 5.e.iii.
if (len === 2)
break;
- // Step 4.e.iv.
+ // Step 5.e.iv.
if (e === -1) {
end = size;
break;
}
- // Step 4.e.v.
+ // Step 5.e.v.
end = e;
k = e + 1;
}
- // Step 4.f.
+ // Step 5.f.
return callFunction(String_substring, extension, start, end);
}
- // Step 5.
+ // Step 6.
searchValue = "-" + key;
- // Steps 6-7.
+ // Steps 7-8.
if (callFunction(std_String_endsWith, extension, searchValue))
return "";
- // Step 8 (implicit).
+ // Step 9 (implicit).
}
/**
@@ -899,11 +1577,9 @@ function UnicodeExtensionValue(extension, key) {
* caller's relevant extensions and locale data as well as client-provided
* options into consideration.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.5.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.6.
*/
function ResolveLocale(availableLocales, requestedLocales, options, relevantExtensionKeys, localeData) {
- /*jshint laxbreak: true */
-
// Steps 1-3.
var matcher = options.localeMatcher;
var r = (matcher === "lookup")
@@ -912,79 +1588,82 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
// Step 4.
var foundLocale = r.locale;
-
- // Step 5 (Not applicable in this implementation).
var extension = r.extension;
- // Steps 6-7.
+ // Step 5.
var result = new Record();
+
+ // Step 6.
result.dataLocale = foundLocale;
- // Step 8.
+ // Step 7.
var supportedExtension = "-u";
// In this implementation, localeData is a function, not an object.
var localeDataProvider = localeData();
- // Steps 9-12.
+ // Step 8.
for (var i = 0; i < relevantExtensionKeys.length; i++) {
- // Steps 12.a-c.
var key = relevantExtensionKeys[i];
- // Steps 12.b-d (The locale data is only computed when needed).
+ // Steps 8.a-h (The locale data is only computed when needed).
var keyLocaleData = undefined;
var value = undefined;
// Locale tag may override.
- // Step 12.e.
+ // Step 8.g.
var supportedExtensionAddition = "";
- // Step 12.f.
+ // Step 8.h.
if (extension !== undefined) {
- // NB: The step annotations don't yet match the ES2017 Intl draft,
- // 94045d234762ad107a3d09bb6f7381a65f1a2f9b, because the PR to add
- // the new UnicodeExtensionValue abstract operation still needs to
- // be written.
-
- // Step 12.f.i.
+ // Step 8.h.i.
var requestedValue = UnicodeExtensionValue(extension, key);
- // Step 12.f.ii.
+ // Step 8.h.ii.
if (requestedValue !== undefined) {
- // Steps 12.b-c.
+ // Steps 8.a-d.
keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale);
- // Step 12.f.ii.1.
+ // Step 8.h.ii.1.
if (requestedValue !== "") {
- // Step 12.f.ii.1.a.
+ // Step 8.h.ii.1.a.
if (callFunction(ArrayIndexOf, keyLocaleData, requestedValue) !== -1) {
value = requestedValue;
supportedExtensionAddition = "-" + key + "-" + value;
}
} else {
- // Step 12.f.ii.2.
+ // Step 8.h.ii.2.
// According to the LDML spec, if there's no type value,
// and true is an allowed value, it's used.
- if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1)
+ if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1) {
value = "true";
+ supportedExtensionAddition = "-" + key;
+ }
}
}
}
// Options override all.
- // Step 12.g.i.
+ // Step 8.i.i.
var optionsValue = options[key];
- // Step 12.g, 12.gg.ii.
+ // Step 8.i.ii.
+ assert(typeof optionsValue === "string" ||
+ optionsValue === undefined ||
+ optionsValue === null,
+ "unexpected type for options value");
+
+ // Steps 8.i, 8.i.iii.1.
if (optionsValue !== undefined && optionsValue !== value) {
- // Steps 12.b-c.
+ // Steps 8.a-d.
if (keyLocaleData === undefined)
keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale);
+ // Step 8.i.iii.
if (callFunction(ArrayIndexOf, keyLocaleData, optionsValue) !== -1) {
value = optionsValue;
supportedExtensionAddition = "";
@@ -993,27 +1672,29 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
// Locale data provides default value.
if (value === undefined) {
- // Steps 12.b-d.
+ // Steps 8.a-f.
value = keyLocaleData === undefined
? callFunction(localeDataProvider.default[key], null, foundLocale)
: keyLocaleData[0];
}
- // Steps 12.h-j.
+ // Step 8.j.
assert(typeof value === "string" || value === null, "unexpected locale data value");
result[key] = value;
+
+ // Step 8.k.
supportedExtension += supportedExtensionAddition;
}
- // Step 13.
+ // Step 9.
if (supportedExtension.length > 2) {
assert(!callFunction(std_String_startsWith, foundLocale, "x-"),
"unexpected privateuse-only locale returned from ICU");
- // Step 13.a.
+ // Step 9.a.
var privateIndex = callFunction(std_String_indexOf, foundLocale, "-x-");
- // Steps 13.b-c.
+ // Steps 9.b-c.
if (privateIndex === -1) {
foundLocale += supportedExtension;
} else {
@@ -1022,19 +1703,19 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
foundLocale = preExtension + supportedExtension + postExtension;
}
- // Step 13.d.
+ // Step 9.d.
assert(IsStructurallyValidLanguageTag(foundLocale), "invalid locale after concatenation");
- // Step 13.e (Not required in this implementation, because we don't
+ // Step 9.e (Not required in this implementation, because we don't
// canonicalize Unicode extension subtags).
assert(foundLocale === CanonicalizeLanguageTag(foundLocale), "same locale with extension");
}
- // Step 14.
+ // Step 10.
result.locale = foundLocale;
- // Step 15.
+ // Step 11.
return result;
}
@@ -1044,31 +1725,29 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.6.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.7.
*/
function LookupSupportedLocales(availableLocales, requestedLocales) {
- // Steps 1-2.
- var len = requestedLocales.length;
- var subset = new List();
+ // Step 1.
+ var subset = [];
- // Steps 3-4.
- var k = 0;
- while (k < len) {
- // Steps 4.a-b.
- var locale = requestedLocales[k];
+ // Step 2.
+ for (var i = 0; i < requestedLocales.length; i++) {
+ var locale = requestedLocales[i];
+
+ // Step 2.a.
var noExtensionsLocale = removeUnicodeExtensions(locale);
- // Step 4.c-d.
+ // Step 2.b.
var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
- if (availableLocale !== undefined)
- callFunction(std_Array_push, subset, locale);
- // Step 4.e.
- k++;
+ // Step 2.c.
+ if (availableLocale !== undefined)
+ _DefineDataProperty(subset, subset.length, locale);
}
- // Steps 5-6.
- return callFunction(std_Array_slice, subset, 0);
+ // Step 3.
+ return subset;
}
@@ -1077,7 +1756,7 @@ function LookupSupportedLocales(availableLocales, requestedLocales) {
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.7.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.8.
*/
function BestFitSupportedLocales(availableLocales, requestedLocales) {
// don't have anything better
@@ -1090,19 +1769,17 @@ function BestFitSupportedLocales(availableLocales, requestedLocales) {
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.8.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.9.
*/
function SupportedLocales(availableLocales, requestedLocales, options) {
- /*jshint laxbreak: true */
-
// Step 1.
var matcher;
if (options !== undefined) {
- // Steps 1.a-b.
+ // Step 1.a.
options = ToObject(options);
- matcher = options.localeMatcher;
- // Step 1.c.
+ // Step 1.b
+ matcher = options.localeMatcher;
if (matcher !== undefined) {
matcher = ToString(matcher);
if (matcher !== "lookup" && matcher !== "best fit")
@@ -1110,12 +1787,12 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
}
}
- // Steps 2-3.
+ // Steps 2-5.
var subset = (matcher === undefined || matcher === "best fit")
? BestFitSupportedLocales(availableLocales, requestedLocales)
: LookupSupportedLocales(availableLocales, requestedLocales);
- // Step 4.
+ // Steps 6-7.
for (var i = 0; i < subset.length; i++) {
_DefineDataProperty(subset, i, subset[i],
ATTR_ENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE);
@@ -1123,7 +1800,7 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
_DefineDataProperty(subset, "length", subset.length,
ATTR_NONENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE);
- // Step 5.
+ // Step 8.
return subset;
}
@@ -1133,7 +1810,7 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
* the required type, checks whether it is one of a list of allowed values,
* and fills in a fallback value if necessary.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.9.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.10.
*/
function GetOption(options, property, type, values, fallback) {
// Step 1.