summaryrefslogtreecommitdiff
path: root/js/src/regexp/import-irregexp.py
blob: 870387232cf776ed95eed97d3e9c10c741a73bf5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python3

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

# This script handles all the mechanical steps of importing irregexp from v8:
#
# 1. Acquire the source: either from github, or optionally from a local copy of v8.
# 2. Copy the contents of v8/src/regexp into js/src/regexp
#    - Exclude files that we have chosen not to import.
# 3. While doing so, update #includes:
#    - Change "src/regexp/*" to "regexp/*".
#    - Remove other v8-specific headers completely.
# 4. Add '#include "regexp/regexp-shim.h" in the necessary places.
# 5. Update the VERSION file to include the correct git hash.
#
# Usage:
#  cd path/to/js/src/regexp
#  ./import-irregexp.py --path path/to/v8/src/regexp
#
# Alternatively, without the --path argument, import-irregexp.py will
# clone v8 from github into a temporary directory.
#
# After running this script, changes to the shim code may be necessary
# to account for changes in upstream irregexp.

import os
import re
import subprocess
import sys
from pathlib import Path


def get_hash(path):
    # Get the hash for the current git revision
    cwd = os.getcwd()
    os.chdir(path)
    command = ['git', 'rev-parse', 'HEAD']
    result = subprocess.check_output(command, encoding='utf-8')
    os.chdir(cwd)
    return result.rstrip()


def copy_and_update_includes(src_path, dst_path):
    # List of header files that need to include the shim header
    need_shim = ['property-sequences.h',
                 'regexp-ast.h',
                 'regexp-bytecode-peephole.h',
                 'regexp-bytecodes.h',
                 'regexp-dotprinter.h',
                 'regexp.h',
                 'regexp-macro-assembler.h',
                 'regexp-stack.h',
                 'special-case.h']

    src = open(str(src_path), 'r')
    dst = open(str(dst_path), 'w')

    # 1. Rewrite includes of V8 regexp headers:
    regexp_include = re.compile('#include "src/regexp')
    regexp_include_new = '#include "regexp'

    # 2. Remove includes of other V8 headers
    other_include = re.compile('#include "src/')

    # 3. If needed, add '#include "regexp/regexp-shim.h"'.
    #    Note: We get a little fancy to ensure that header files are
    #    in alphabetic order. `need_to_add_shim` is true if we still
    #    have to add the shim header in this file. `adding_shim_now`
    #    is true if we have found a '#include "src/*' and we are just
    #    waiting to find something alphabetically smaller (or an empty
    #    line) so that we can insert the shim header in the right place.
    need_to_add_shim = src_path.name in need_shim
    adding_shim_now = False

    for line in src:
        if adding_shim_now:
            if (line == '\n' or line > '#include "src/regexp/regexp-shim.h"'):
                dst.write('#include "regexp/regexp-shim.h"\n')
                need_to_add_shim = False
                adding_shim_now = False

        if regexp_include.search(line):
            dst.write(re.sub(regexp_include, regexp_include_new, line))
        elif other_include.search(line):
            if need_to_add_shim:
                adding_shim_now = True
        else:
            dst.write(line)


def import_from(srcdir, dstdir):
    excluded = ['OWNERS',
                'regexp.cc',
                'regexp-utils.cc',
                'regexp-utils.h',
                'regexp-macro-assembler-arch.h']

    for file in srcdir.iterdir():
        if file.is_dir():
            continue
        if str(file.name) in excluded:
            continue
        copy_and_update_includes(file, dstdir / file.name)

    # Update VERSION file
    hash = get_hash(srcdir)
    version_file = open(str(dstdir / 'VERSION'), 'w')
    version_file.write('Imported using import-irregexp.py from:\n')
    version_file.write('https://github.com/v8/v8/tree/%s/src/regexp\n' % hash)


if __name__ == '__main__':
    import argparse
    import tempfile

    # This script should be run from js/src/regexp to work correctly.
    current_path = Path(os.getcwd())
    expected_path = 'js/src/regexp'
    if not current_path.match(expected_path):
        raise RuntimeError('%s must be run from %s' % (sys.argv[0],
                                                       expected_path))

    parser = argparse.ArgumentParser(description='Import irregexp from v8')
    parser.add_argument('-p', '--path', help='path to v8/src/regexp')
    args = parser.parse_args()

    if args.path:
        src_path = Path(args.path)

        if not (src_path / 'regexp.h').exists():
            print('Usage:\n  import-irregexp.py --path <path/to/v8/src/regexp>')
            sys.exit(1)
        import_from(src_path, current_path)
        sys.exit(0)

    with tempfile.TemporaryDirectory() as tempdir:
        v8_git = 'https://github.com/v8/v8.git'
        clone = 'git clone --depth 1 %s %s' % (v8_git, tempdir)
        os.system(clone)
        src_path = Path(tempdir) / 'src/regexp'
        import_from(src_path, current_path)