1 files changed, 0 insertions, 117 deletions
diff --git a/js/src/new-regexp/special-case.h b/js/src/new-regexp/special-case.h
deleted file mode 100644
index 31dfd78582..0000000000
--- a/js/src/new-regexp/special-case.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_SPECIAL_CASE_H_
-#define V8_REGEXP_SPECIAL_CASE_H_
-
-#ifdef V8_INTL_SUPPORT
-#include "new-regexp/regexp-shim.h"
-
-#include "unicode/uchar.h"
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-
-namespace v8 {
-namespace internal {
-
-// Sets of Unicode characters that need special handling under "i" mode
-
-// For non-unicode ignoreCase matches (aka "i", not "iu"), ECMA 262
-// defines slightly different case-folding rules than Unicode. An
-// input character should match a pattern character if the result of
-// the Canonicalize algorithm is the same for both characters.
-//
-// Roughly speaking, for "i" regexps, Canonicalize(c) is the same as
-// c.toUpperCase(), unless a) c.toUpperCase() is a multi-character
-// string, or b) c is non-ASCII, and c.toUpperCase() is ASCII. See
-// https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch for
-// the precise definition.
-//
-// While compiling such regular expressions, we need to compute the
-// set of characters that should match a given input character. (See
-// GetCaseIndependentLetters and CharacterRange::AddCaseEquivalents.)
-// For almost all characters, this can be efficiently computed using
-// UnicodeSet::closeOver(USET_CASE_INSENSITIVE). These sets represent
-// the remaining special cases.
-//
-// For a character c, the rules are as follows:
-//
-// 1. If c is in neither IgnoreSet nor SpecialAddSet, then calling
-//    UnicodeSet::closeOver(USET_CASE_INSENSITIVE) on a UnicodeSet
-//    containing c will produce the set of characters that should
-//    match /c/i (or /[c]/i), and only those characters.
-//
-// 2. If c is in IgnoreSet, then the only character it should match is
-//    itself. However, closeOver will add additional incorrect
-//    matches. For example, consider SHARP S: 'ß' (U+00DF) and 'ẞ'
-//    (U+1E9E). Although closeOver('ß') = "ßẞ", uppercase('ß') is
-//    "SS".  Step 3.e therefore requires that 'ß' canonicalizes to
-//    itself, and should not match 'ẞ'. In these cases, we can skip
-//    the closeOver entirely, because it will never add an equivalent
-//    character.
-//
-// 3. If c is in SpecialAddSet, then it should match at least one
-//    character other than itself. However, closeOver will add at
-//    least one additional incorrect match. For example, consider the
-//    letter 'k'. Closing over 'k' gives "kKK" (lowercase k, uppercase
-//    K, U+212A KELVIN SIGN). However, because of step 3.g, KELVIN
-//    SIGN should not match either of the other two characters. As a
-//    result, "k" and "K" are in SpecialAddSet (and KELVIN SIGN is in
-//    IgnoreSet). To find the correct matches for characters in
-//    SpecialAddSet, we closeOver the original character, but filter
-//    out the results that do not have the same canonical value.
-//
-// The contents of these sets are calculated at build time by
-// src/regexp/gen-regexp-special-case.cc, which generates
-// gen/src/regexp/special-case.cc. This is done by iterating over the
-// result of closeOver for each BMP character, and finding sets for
-// which at least one character has a different canonical value than
-// another character. Characters that match no other characters in
-// their equivalence class are added to IgnoreSet. Characters that
-// match at least one other character are added to SpecialAddSet.
-
-class RegExpCaseFolding final : public AllStatic {
- public:
-  static const icu::UnicodeSet& IgnoreSet();
-  static const icu::UnicodeSet& SpecialAddSet();
-
-  // This implements ECMAScript 2020 21.2.2.8.2 (Runtime Semantics:
-  // Canonicalize) step 3, which is used to determine whether
-  // characters match when ignoreCase is true and unicode is false.
-  static UChar32 Canonicalize(UChar32 ch) {
-    // a. Assert: ch is a UTF-16 code unit.
-    CHECK_LE(ch, 0xffff);
-
-    // b. Let s be the String value consisting of the single code unit ch.
-    icu::UnicodeString s(ch);
-
-    // c. Let u be the same result produced as if by performing the algorithm
-    // for String.prototype.toUpperCase using s as the this value.
-    // d. Assert: Type(u) is String.
-    icu::UnicodeString& u = s.toUpper();
-
-    // e. If u does not consist of a single code unit, return ch.
-    if (u.length() != 1) {
-      return ch;
-    }
-
-    // f. Let cu be u's single code unit element.
-    UChar32 cu = u.char32At(0);
-
-    // g. If the value of ch >= 128 and the value of cu < 128, return ch.
-    if (ch >= 128 && cu < 128) {
-      return ch;
-    }
-
-    // h. Return cu.
-    return cu;
-  }
-};
-
-}  // namespace internal
-}  // namespace v8
-
-#endif  // V8_INTL_SUPPORT
-
-#endif  // V8_REGEXP_SPECIAL_CASE_H_