diff --git a/Makefile b/Makefile index 231bbe11f4c011..c89c0441a47e30 100644 --- a/Makefile +++ b/Makefile @@ -1504,7 +1504,6 @@ LINT_CPP_FILES = $(filter-out $(LINT_CPP_EXCLUDE), $(wildcard \ test/fixtures/*.c \ test/js-native-api/*/*.cc \ test/node-api/*/*.cc \ - tools/js2c.cc \ tools/icu/*.cc \ tools/icu/*.h \ tools/code_cache/*.cc \ diff --git a/configure.py b/configure.py index 6182537bfa40fe..0af32965226f22 100755 --- a/configure.py +++ b/configure.py @@ -2698,7 +2698,7 @@ def make_bin_override(): print_verbose(output) -# Dump as JSON to allow js2c.cc read it as a simple json file. +# Dump as JSON to allow js2c to read it as a simple json file. write('config.gypi', do_not_edit + json.dumps(output, indent=2) + '\n') diff --git a/lib/internal/bootstrap/realm.js b/lib/internal/bootstrap/realm.js index 2ccceb493e68bb..c48192369bded3 100644 --- a/lib/internal/bootstrap/realm.js +++ b/lib/internal/bootstrap/realm.js @@ -31,7 +31,7 @@ // Internal JavaScript module loader: // - BuiltinModule: a minimal module system used to load the JavaScript core // modules found in lib/**/*.js and deps/**/*.js. All core modules are -// compiled into the node binary via node_javascript.cc generated by js2c.cc, +// compiled into the node binary via node_javascript.cc generated by js2c, // so they can be loaded faster without the cost of I/O. This class makes the // lib/internal/*, deps/internal/* modules and internalBinding() available by // default to core modules, and lets the core modules require itself via diff --git a/node.gyp b/node.gyp index bd77943b105173..f26d1d9d1340cf 100644 --- a/node.gyp +++ b/node.gyp @@ -43,7 +43,7 @@ 'ossfuzz' : 'false', 'linked_module_files': [ ], - # We list the deps/ files out instead of globbing them in js2c.cc since we + # We list the deps/ files out instead of globbing them in js2c.rs since we # only include a subset of all the files under these directories. # The lengths of their file names combined should not exceed the # Windows command length limit or there would be an error. @@ -1552,40 +1552,55 @@ }, # nop { 'target_name': 'node_js2c', - 'type': 'executable', + 'type': 'none', 'toolsets': ['host'], - 'include_dirs': [ - 'tools', - 'src', - ], - 'sources': [ - 'tools/js2c.cc', - 'tools/executable_wrapper.h', - 'src/embedded_data.h', - 'src/embedded_data.cc', - 'src/builtin_info.h', - 'src/builtin_info.cc', + 'variables': { + 'node_js2c_rustc_flags': [ + '--edition=2021', + '--crate-name', + 'node_js2c', + '-C', + 'opt-level=2', + ], + }, + 'actions': [ + { + 'action_name': 'build_node_js2c', + 'inputs': [ + 'tools/js2c.rs', + ], + 'outputs': [ + '<(node_js2c_exec)', + ], + 'action': [ + 'rustc', + '<@(node_js2c_rustc_flags)', + 'tools/js2c.rs', + '-o', + '<@(_outputs)', + ], + }, ], 'conditions': [ - [ 'OS=="mac"', { - 'libraries': [ '-framework CoreFoundation -framework Security' ], - }], - [ 'node_shared_simdutf=="false" and node_use_bundled_v8!="false"', { - 'dependencies': [ 'tools/v8_gypfiles/v8.gyp:simdutf#host' ], - }], - [ 'node_shared_libuv=="false"', { - 'dependencies': [ 'deps/uv/uv.gyp:libuv#host' ], - }], [ 'OS in "linux mac openharmony"', { - 'defines': ['NODE_JS2C_USE_STRING_LITERALS'], + 'variables': { + 'node_js2c_rustc_flags+': [ + '--cfg', + 'node_js2c_use_string_literals', + ], + }, }], [ 'debug_node=="true"', { - 'cflags!': [ '-O3' ], - 'cflags': [ '-g', '-O0' ], - 'defines': [ 'DEBUG' ], - 'xcode_settings': { - 'OTHER_CFLAGS': [ - '-g', '-O0' + 'variables': { + 'node_js2c_rustc_flags!': [ + '-C', + 'opt-level=2', + ], + 'node_js2c_rustc_flags+': [ + '-C', + 'debuginfo=2', + '-C', + 'opt-level=0', ], }, }], diff --git a/src/node_builtins.h b/src/node_builtins.h index e4af1f42f4442b..0651e1e4a944af 100644 --- a/src/node_builtins.h +++ b/src/node_builtins.h @@ -79,7 +79,7 @@ using BuiltinSourceMap = std::map; using BuiltinCodeCacheMap = std::unordered_map; -// Generated by tools/js2c.cc as node_javascript.cc +// Generated by tools/js2c as node_javascript.cc void RegisterExternalReferencesForInternalizedBuiltinCode( ExternalReferenceRegistry* registry); @@ -139,7 +139,7 @@ class NODE_EXTERN_PRIVATE BuiltinLoader { // Only allow access from friends. friend class CodeCacheBuilder; - // Generated by tools/js2c.cc as node_javascript.cc + // Generated by tools/js2c as node_javascript.cc void LoadJavaScriptSource(); // Loads data into source_ UnionBytes GetConfig(); // Return data for config.gypi diff --git a/tools/js2c.cc b/tools/js2c.cc deleted file mode 100644 index 2cb09f8e1d7ba6..00000000000000 --- a/tools/js2c.cc +++ /dev/null @@ -1,964 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "builtin_info.h" -#include "embedded_data.h" -#include "executable_wrapper.h" -#include "simdutf.h" -#include "uv.h" - -#if defined(_WIN32) -#include // _S_IREAD _S_IWRITE -#ifndef S_IRUSR -#define S_IRUSR _S_IREAD -#endif // S_IRUSR -#ifndef S_IWUSR -#define S_IWUSR _S_IWRITE -#endif // S_IWUSR -#endif -namespace node { -namespace js2c { -int Main(int argc, char* argv[]); - -static bool is_verbose = false; - -void Debug(const char* format, ...) { - va_list arguments; - va_start(arguments, format); - if (is_verbose) { - vfprintf(stderr, format, arguments); - } - va_end(arguments); -} - -void PrintUvError(const char* syscall, const char* filename, int error) { - fprintf(stderr, "[%s] %s: %s\n", syscall, filename, uv_strerror(error)); -} - -int GetStats(const char* path, std::function func) { - uv_fs_t req; - int r = uv_fs_stat(nullptr, &req, path, nullptr); - if (r == 0) { - func(static_cast(req.ptr)); - } - uv_fs_req_cleanup(&req); - return r; -} - -bool IsDirectory(const std::string& filename, int* error) { - bool result = false; - *error = GetStats(filename.c_str(), [&](const uv_stat_t* stats) { - result = !!(stats->st_mode & S_IFDIR); - }); - if (*error != 0) { - PrintUvError("stat", filename.c_str(), *error); - } - return result; -} - -size_t GetFileSize(const std::string& filename, int* error) { - size_t result = 0; - *error = GetStats(filename.c_str(), - [&](const uv_stat_t* stats) { result = stats->st_size; }); - return result; -} - -constexpr bool FilenameIsConfigGypi(const std::string_view path) { - return path == "config.gypi" || path.ends_with("/config.gypi"); -} - -typedef std::vector FileList; -typedef std::map FileMap; - -bool SearchFiles(const std::string& dir, - FileMap* file_map, - std::string_view extension) { - uv_fs_t scan_req; - int result = uv_fs_scandir(nullptr, &scan_req, dir.c_str(), 0, nullptr); - bool errored = false; - if (result < 0) { - PrintUvError("scandir", dir.c_str(), result); - errored = true; - } else { - auto it = file_map->insert({std::string(extension), FileList()}).first; - FileList& files = it->second; - files.reserve(files.size() + result); - uv_dirent_t dent; - while (true) { - result = uv_fs_scandir_next(&scan_req, &dent); - if (result == UV_EOF) { - break; - } - - if (result != 0) { - PrintUvError("scandir_next", dir.c_str(), result); - errored = true; - break; - } - - std::string path = dir + '/' + dent.name; - if (path.ends_with(extension)) { - files.emplace_back(path); - continue; - } - if (!IsDirectory(path, &result)) { - if (result == 0) { // It's a file, no need to search further. - continue; - } else { - errored = true; - break; - } - } - - if (!SearchFiles(path, file_map, extension)) { - errored = true; - break; - } - } - } - - uv_fs_req_cleanup(&scan_req); - return !errored; -} - -constexpr std::string_view kMjsSuffix = ".mjs"; -constexpr std::string_view kJsSuffix = ".js"; -constexpr std::string_view kGypiSuffix = ".gypi"; -constexpr std::string_view depsPrefix = "deps/"; -constexpr std::string_view libPrefix = "lib/"; - -constexpr std::string_view HasAllowedExtensions( - const std::string_view filename) { - for (const auto& ext : {kGypiSuffix, kJsSuffix, kMjsSuffix}) { - if (filename.ends_with(ext)) { - return ext; - } - } - return {}; -} - -using Fragment = std::vector; -using Fragments = std::vector>; - -std::vector Join(const Fragments& fragments, - const std::string& separator) { - size_t length = separator.size() * (fragments.size() - 1); - for (size_t i = 0; i < fragments.size(); ++i) { - length += fragments[i].size(); - } - std::vector buf(length, 0); - size_t cursor = 0; - for (size_t i = 0; i < fragments.size(); ++i) { - const Fragment& fragment = fragments[i]; - // Avoid using snprintf on large chunks of data because it's much slower. - // It's fine to use it on small amount of data though. - if (i != 0) { - memcpy(buf.data() + cursor, separator.c_str(), separator.size()); - cursor += separator.size(); - } - memcpy(buf.data() + cursor, fragment.data(), fragment.size()); - cursor += fragment.size(); - } - buf.resize(cursor); - return buf; -} - -const char* kTemplate = R"( -#include "env-inl.h" -#include "node_builtins.h" -#include "node_external_reference.h" -#include "node_internals.h" - -namespace node { - -namespace builtins { - -%.*s -namespace { -const ThreadsafeCopyOnWrite global_source_map { - BuiltinSourceMap { -%.*s - } // BuiltinSourceMap -}; // ThreadsafeCopyOnWrite -} // anonymous namespace - -void BuiltinLoader::LoadJavaScriptSource() { - source_ = global_source_map; -} - -void RegisterExternalReferencesForInternalizedBuiltinCode( - ExternalReferenceRegistry* registry) { -%.*s -} - -UnionBytes BuiltinLoader::GetConfig() { - return UnionBytes(&config_resource); -} - -} // namespace builtins - -} // namespace node -)"; - -Fragment Format(const Fragments& definitions, - const Fragments& initializers, - const Fragments& registrations) { - std::vector def_buf = Join(definitions, "\n"); - size_t def_size = def_buf.size(); - std::vector init_buf = Join(initializers, "\n"); - size_t init_size = init_buf.size(); - std::vector reg_buf = Join(registrations, "\n"); - size_t reg_size = reg_buf.size(); - - size_t result_size = - def_size + init_size + reg_size + strlen(kTemplate) + 100; - std::vector result(result_size, 0); - int r = snprintf(result.data(), - result_size, - kTemplate, - static_cast(def_buf.size()), - def_buf.data(), - static_cast(init_buf.size()), - init_buf.data(), - static_cast(reg_buf.size()), - reg_buf.data()); - result.resize(r); - return result; -} - -std::vector ReadFileSync(const char* path, size_t size, int* error) { - uv_fs_t req; - Debug("ReadFileSync %s with size %zu\n", path, size); - - uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr); - if (req.result < 0) { - uv_fs_req_cleanup(&req); - *error = req.result; - return std::vector(); - } - uv_fs_req_cleanup(&req); - - std::vector contents(size); - size_t offset = 0; - - while (offset < size) { - uv_buf_t buf = uv_buf_init(contents.data() + offset, size - offset); - int bytes_read = uv_fs_read(nullptr, &req, file, &buf, 1, offset, nullptr); - offset += bytes_read; - *error = req.result; - uv_fs_req_cleanup(&req); - if (*error < 0) { - uv_fs_close(nullptr, &req, file, nullptr); - // We can't do anything if uv_fs_close returns error, so just return. - return std::vector(); - } - if (bytes_read <= 0) { - break; - } - } - assert(offset == size); - - *error = uv_fs_close(nullptr, &req, file, nullptr); - return contents; -} - -int WriteFileSync(const std::vector& out, const char* path) { - Debug("WriteFileSync %zu bytes to %s\n", out.size(), path); - uv_fs_t req; - uv_file file = uv_fs_open(nullptr, - &req, - path, - UV_FS_O_CREAT | UV_FS_O_WRONLY | UV_FS_O_TRUNC, - S_IWUSR | S_IRUSR, - nullptr); - int err = req.result; - uv_fs_req_cleanup(&req); - if (err < 0) { - return err; - } - - uv_buf_t buf = uv_buf_init(const_cast(out.data()), out.size()); - err = uv_fs_write(nullptr, &req, file, &buf, 1, 0, nullptr); - uv_fs_req_cleanup(&req); - - int r = uv_fs_close(nullptr, &req, file, nullptr); - uv_fs_req_cleanup(&req); - if (err < 0) { - // We can't do anything if uv_fs_close returns error, so just return. - return err; - } - return r; -} - -int WriteIfChanged(const Fragment& out, const std::string& dest) { - Debug("output size %zu\n", out.size()); - - int error = 0; - size_t size = GetFileSize(dest, &error); - if (error != 0 && error != UV_ENOENT) { - return error; - } - Debug("existing size %zu\n", size); - - bool changed = true; - // If it's not the same size, the file is definitely changed so we'll - // just proceed to update. Otherwise check the content before deciding - // whether we want to write it. - if (error != UV_ENOENT && size == out.size()) { - std::vector content = ReadFileSync(dest.c_str(), size, &error); - if (error == 0) { // In case of error, always write the file. - changed = (memcmp(content.data(), out.data(), size) != 0); - } - } - if (!changed) { - Debug("No change, return\n"); - return 0; - } - return WriteFileSync(out, dest.c_str()); -} - -std::string GetFileId(const std::string& filename) { - size_t end = filename.size(); - size_t start = 0; - std::string prefix; - // Strip .mjs and .js suffix - if (filename.ends_with(kMjsSuffix)) { - end -= kMjsSuffix.size(); - } else if (filename.ends_with(kJsSuffix)) { - end -= kJsSuffix.size(); - } - - // deps/acorn/acorn/dist/acorn.js -> internal/deps/acorn/acorn/dist/acorn - if (filename.starts_with(depsPrefix)) { - start = depsPrefix.size(); - prefix = "internal/deps/"; - } else if (filename.starts_with(libPrefix)) { - // lib/internal/url.js -> internal/url - start = libPrefix.size(); - prefix = ""; - } - - return prefix + std::string(filename.begin() + start, filename.begin() + end); -} - -std::string GetVariableName(const std::string& id) { - std::string result = id; - size_t length = result.size(); - - for (size_t i = 0; i < length; ++i) { - if (result[i] == '.' || result[i] == '-' || result[i] == '/') { - result[i] = '_'; - } - } - return result; -} - -// The function returns a string buffer and an array of -// offsets. The string is just "0,1,2,3,...,65535,". -// The second array contain the offsets indicating the -// start of each substring ("0,", "1,", etc.) and the final -// offset points just beyond the end of the string. -// 382106 is the length of the string "0,1,2,3,...,65535,". -// 65537 is 2**16 + 1 -// This function could be constexpr, but it might become too expensive to -// compile. -std::pair, std::array> -precompute_string() { - // the string "0,1,2,3,...,65535,". - std::array str; - // the offsets in the string pointing at the beginning of each substring - std::array off; - off[0] = 0; - char* p = &str[0]; - constexpr auto const_int_to_str = [](uint16_t value, char* s) -> uint32_t { - uint32_t index = 0; - do { - s[index++] = '0' + (value % 10); - value /= 10; - } while (value != 0); - - for (uint32_t i = 0; i < index / 2; ++i) { - char temp = s[i]; - s[i] = s[index - i - 1]; - s[index - i - 1] = temp; - } - s[index] = ','; - return index + 1; - }; - for (int i = 0; i < 65536; ++i) { - size_t offset = const_int_to_str(i, p); - p += offset; - off[i + 1] = off[i] + offset; - } - return {str, off}; -} - -const std::string_view GetCode(uint16_t index) { - // We use about 644254 bytes of memory. An array of 65536 strings might use - // 2097152 bytes so we save 3x the memory. - static auto [backing_string, offsets] = precompute_string(); - return std::string_view(&backing_string[offsets[index]], - offsets[index + 1] - offsets[index]); -} - -#ifdef NODE_JS2C_USE_STRING_LITERALS -const char* string_literal_def_template = "static const %s *%s_raw = "; -constexpr std::string_view latin1_string_literal_start = - "reinterpret_cast(\""; -constexpr std::string_view ascii_string_literal_start = - "reinterpret_cast(R\"JS2C1b732aee("; -constexpr std::string_view utf16_string_literal_start = - "reinterpret_cast(uR\"JS2C1b732aee("; -constexpr std::string_view latin1_string_literal_end = "\");"; -constexpr std::string_view utf_string_literal_end = ")JS2C1b732aee\");"; -#else -const char* array_literal_def_template = "static const %s %s_raw[] = "; -constexpr std::string_view array_literal_start = "{\n"; -constexpr std::string_view array_literal_end = "\n};\n\n"; -#endif - -// Definitions: -// static const uint8_t fs_raw[] = { -// .... -// }; -// -// static StaticExternalOneByteResource fs_resource(fs_raw, 1234, nullptr); -// -// static const uint16_t internal_cli_table_raw[] = { -// .... -// }; -// -// static StaticExternalTwoByteResource -// internal_cli_table_resource(internal_cli_table_raw, 1234, nullptr); -// -// If NODE_JS2C_USE_STRING_LITERALS is defined, the data is output as C++ -// raw strings (i.e. R"JS2C1b732aee(...)JS2C1b732aee") rather than as an -// array. This speeds up compilation for gcc/clang. -enum class CodeType { - kAscii, // Code points are all within 0-127 - kLatin1, // Code points are all within 0-255 - kTwoByte, -}; -template -Fragment GetDefinitionImpl(const std::vector& code, - const std::string& var, - CodeType type) { - constexpr bool is_two_byte = std::is_same_v; - static_assert(is_two_byte || std::is_same_v); - - size_t count = is_two_byte - ? simdutf::utf16_length_from_utf8(code.data(), code.size()) - : code.size(); - constexpr const char* arr_type = is_two_byte ? "uint16_t" : "uint8_t"; - constexpr const char* resource_type = is_two_byte - ? "StaticExternalTwoByteResource" - : "StaticExternalOneByteResource"; - -#ifdef NODE_JS2C_USE_STRING_LITERALS - const char* literal_def_template = string_literal_def_template; - // For code that contains Latin-1 characters, be conservative and assume - // they all need escaping: one "\" and three digits. - size_t unit = type == CodeType::kLatin1 ? 4 : 1; - size_t def_size = 512 + code.size() * unit; -#else - const char* literal_def_template = array_literal_def_template; - constexpr size_t unit = - (is_two_byte ? 5 : 3) + 1; // 0-65536 or 0-255 and a "," - size_t def_size = 512 + count * unit; -#endif - - Fragment result(def_size, 0); - - int cur = snprintf( - result.data(), def_size, literal_def_template, arr_type, var.c_str()); - - assert(cur != 0); - -#ifdef NODE_JS2C_USE_STRING_LITERALS - std::string_view start_string_view; - switch (type) { - case CodeType::kAscii: - start_string_view = ascii_string_literal_start; - break; - case CodeType::kLatin1: - start_string_view = latin1_string_literal_start; - break; - case CodeType::kTwoByte: - start_string_view = utf16_string_literal_start; - break; - } - - memcpy( - result.data() + cur, start_string_view.data(), start_string_view.size()); - cur += start_string_view.size(); - - if (type != CodeType::kLatin1) { - memcpy(result.data() + cur, code.data(), code.size()); - cur += code.size(); - } else { - const uint8_t* ptr = reinterpret_cast(code.data()); - for (size_t i = 0; i < count; ++i) { - // Avoid using snprintf on large chunks of data because it's much slower. - // It's fine to use it on small amount of data though. - uint8_t ch = ptr[i]; - if (ch > 127) { - Debug("In %s, found non-ASCII Latin-1 character at %zu: %d\n", - var.c_str(), - i, - ch); - } - const std::string& str = GetOctalCode(ch); - memcpy(result.data() + cur, str.c_str(), str.size()); - cur += str.size(); - } - } - - std::string_view string_literal_end; - switch (type) { - case CodeType::kAscii: - string_literal_end = utf_string_literal_end; - break; - case CodeType::kLatin1: - string_literal_end = latin1_string_literal_end; - break; - case CodeType::kTwoByte: - string_literal_end = utf_string_literal_end; - break; - } - memcpy(result.data() + cur, - string_literal_end.data(), - string_literal_end.size()); - cur += string_literal_end.size(); -#else - memcpy(result.data() + cur, - array_literal_start.data(), - array_literal_start.size()); - cur += array_literal_start.size(); - - // Avoid using snprintf on large chunks of data because it's much slower. - // It's fine to use it on small amount of data though. - if constexpr (is_two_byte) { - std::vector utf16_codepoints(count); - size_t utf16_count = simdutf::convert_utf8_to_utf16( - code.data(), - code.size(), - reinterpret_cast(utf16_codepoints.data())); - assert(utf16_count != 0); - utf16_codepoints.resize(utf16_count); - Debug("static size %zu\n", utf16_count); - for (size_t i = 0; i < utf16_count; ++i) { - std::string_view str = GetCode(utf16_codepoints[i]); - memcpy(result.data() + cur, str.data(), str.size()); - cur += str.size(); - } - } else { - const uint8_t* ptr = reinterpret_cast(code.data()); - for (size_t i = 0; i < count; ++i) { - uint16_t ch = static_cast(ptr[i]); - if (ch > 127) { - Debug("In %s, found non-ASCII Latin-1 character at %zu: %d\n", - var.c_str(), - i, - ch); - } - std::string_view str = GetCode(ch); - memcpy(result.data() + cur, str.data(), str.size()); - cur += str.size(); - } - } - - memcpy( - result.data() + cur, array_literal_end.data(), array_literal_end.size()); - cur += array_literal_end.size(); -#endif - - int end_size = snprintf(result.data() + cur, - result.size() - cur, - "static %s %s_resource(%s_raw, %zu, nullptr);\n", - resource_type, - var.c_str(), - var.c_str(), - count); - cur += end_size; - result.resize(cur); - return result; -} - -bool Simplify(const std::vector& code, - const std::string& var, - std::vector* simplified) { - // Allowlist files to avoid false positives. - // TODO(joyeecheung): this could be removed if undici updates itself - // to replace "’" with "'" though we could still keep this skeleton in - // place for future hot fixes that are verified by humans. - if (var != "internal_deps_undici_undici") { - return false; - } - - size_t code_size = code.size(); - simplified->reserve(code_size); - const uint8_t* ptr = reinterpret_cast(code.data()); - size_t simplified_count = 0; - for (size_t i = 0; i < code_size; ++i) { - switch (ptr[i]) { - case 226: { // ’ [ 226, 128, 153 ] -> ' - if (i + 2 < code_size && ptr[i + 1] == 128 && ptr[i + 2] == 153) { - simplified->push_back('\''); - i += 2; - simplified_count++; - break; - } - [[fallthrough]]; - } - default: { - simplified->push_back(code[i]); - break; - } - } - } - - if (simplified_count > 0) { - Debug("Simplified %lu characters, ", simplified_count); - Debug("old size %lu, new size %lu\n", code_size, simplified->size()); - return true; - } - return false; -} - -Fragment GetDefinition(const std::string& var, const std::vector& code) { - Debug("GetDefinition %s, code size %zu\n", var.c_str(), code.size()); - bool is_ascii = simdutf::validate_ascii(code.data(), code.size()); - - if (is_ascii) { - Debug("ASCII-only, static size %zu\n", code.size()); - return GetDefinitionImpl(code, var, CodeType::kAscii); - } - - std::vector latin1(code.size()); - auto result = simdutf::convert_utf8_to_latin1_with_errors( - code.data(), code.size(), latin1.data()); - if (!result.error) { - latin1.resize(result.count); - Debug("Latin-1-only, old size %zu, new size %zu\n", - code.size(), - latin1.size()); - return GetDefinitionImpl(latin1, var, CodeType::kLatin1); - } - - // Since V8 only supports Latin-1 and UTF16 as underlying representation - // we have to encode all files containing two-byte characters as UTF16. - // While some files do need two-byte characters, some just - // unintentionally have them. Replace certain characters that are known - // to have sane one-byte equivalent to save space. - std::vector simplified; - if (Simplify(code, var, &simplified)) { // Changed. - Debug("%s is simplified, re-generate definition\n", var.c_str()); - return GetDefinition(var, simplified); - } - - // Simplification did not turn the code into 1-byte string. Just - // use the original. - return GetDefinitionImpl(code, var, CodeType::kTwoByte); -} - -int AddModule(const std::string& filename, - Fragments* definitions, - Fragments* initializers, - Fragments* registrations) { - Debug("AddModule %s start\n", filename.c_str()); - - int error = 0; - size_t file_size = GetFileSize(filename, &error); - if (error != 0) { - return error; - } - std::vector code = ReadFileSync(filename.c_str(), file_size, &error); - if (error != 0) { - return error; - } - std::string file_id = GetFileId(filename); - std::string var = GetVariableName(file_id); - - definitions->emplace_back(GetDefinition(var, code)); - std::string source_type = builtins::GetBuiltinSourceTypeName( - builtins::GetBuiltinSourceType(file_id, filename)); - // Initializers of the BuiltinSourceMap: - // {"fs", - // BuiltinSource{UnionBytes(&fs_resource), BuiltinSourceType::kFunction}}, - // {"internal/deps/v8/tools/tickprocessor-driver", - // BuiltinSource{UnionBytes(&fs_resource), - // BuiltinSourceType::kSourceTextModule}}, - Fragment& init_buf = initializers->emplace_back(Fragment(512, 0)); - int init_size = snprintf(init_buf.data(), - init_buf.size(), - " {\"%s\"," - " BuiltinSource{" - " \"%s\"," - " UnionBytes(&%s_resource)," - " BuiltinSourceType::%s} },", - file_id.c_str(), - file_id.c_str(), - var.c_str(), - source_type.c_str()); - init_buf.resize(init_size); - - // Registrations: - // registry->Register(&fs_resource); - Fragment& reg_buf = registrations->emplace_back(Fragment(256, 0)); - int reg_size = snprintf(reg_buf.data(), - reg_buf.size(), - " registry->Register(&%s_resource);", - var.c_str()); - reg_buf.resize(reg_size); - return 0; -} - -std::vector ReplaceAll(const std::vector& data, - const std::string& search, - const std::string& replacement) { - auto cur = data.begin(); - auto last = data.begin(); - std::vector result; - result.reserve(data.size()); - while ((cur = std::search(last, data.end(), search.begin(), search.end())) != - data.end()) { - result.insert(result.end(), last, cur); - result.insert(result.end(), - replacement.c_str(), - replacement.c_str() + replacement.size()); - last = cur + search.size(); - } - result.insert(result.end(), last, data.end()); - return result; -} - -std::vector StripComments(const std::vector& input) { - std::vector result; - result.reserve(input.size()); - - auto last_hash = input.cbegin(); - auto line_begin = input.cbegin(); - auto end = input.cend(); - while ((last_hash = std::find(line_begin, end, '#')) != end) { - result.insert(result.end(), line_begin, last_hash); - line_begin = std::find(last_hash, end, '\n'); - if (line_begin != end) { - line_begin += 1; - } - } - result.insert(result.end(), line_begin, end); - return result; -} - -// This is technically unused for our config.gypi, but just porting it here to -// mimic js2c.py. -std::vector JoinMultilineString(const std::vector& input) { - std::vector result; - result.reserve(input.size()); - - auto closing_quote = input.cbegin(); - auto last_inserted = input.cbegin(); - auto end = input.cend(); - std::string search = "'\n"; - while ((closing_quote = std::search( - last_inserted, end, search.begin(), search.end())) != end) { - if (closing_quote != last_inserted) { - result.insert(result.end(), last_inserted, closing_quote - 1); - last_inserted = closing_quote - 1; - } - auto opening_quote = closing_quote + 2; - while (opening_quote != end && isspace(*opening_quote)) { - opening_quote++; - } - if (opening_quote == end) { - break; - } - if (*opening_quote == '\'') { - last_inserted = opening_quote + 1; - } else { - result.insert(result.end(), last_inserted, opening_quote); - last_inserted = opening_quote; - } - } - result.insert(result.end(), last_inserted, end); - return result; -} - -std::vector JSONify(const std::vector& code) { - // 1. Remove string comments - std::vector stripped = StripComments(code); - - // 2. turn pseudo-booleans strings into Booleans - std::vector result1 = ReplaceAll(stripped, R"("true")", "true"); - std::vector result2 = ReplaceAll(result1, R"("false")", "false"); - - return result2; -} - -int AddGypi(const std::string& var, - const std::string& filename, - Fragments* definitions) { - Debug("AddGypi %s start\n", filename.c_str()); - - int error = 0; - size_t file_size = GetFileSize(filename, &error); - if (error != 0) { - return error; - } - std::vector code = ReadFileSync(filename.c_str(), file_size, &error); - if (error != 0) { - return error; - } - assert(var == "config"); - - std::vector transformed = JSONify(code); - definitions->emplace_back(GetDefinition(var, transformed)); - return 0; -} - -int JS2C(const FileList& js_files, - const FileList& mjs_files, - const std::string& config, - const std::string& dest) { - Fragments definitions; - definitions.reserve(js_files.size() + mjs_files.size() + 1); - Fragments initializers; - initializers.reserve(js_files.size() + mjs_files.size()); - Fragments registrations; - registrations.reserve(js_files.size() + mjs_files.size() + 1); - - for (const auto& filename : js_files) { - int r = AddModule(filename, &definitions, &initializers, ®istrations); - if (r != 0) { - return r; - } - } - for (const auto& filename : mjs_files) { - int r = AddModule(filename, &definitions, &initializers, ®istrations); - if (r != 0) { - return r; - } - } - - assert(FilenameIsConfigGypi(config)); - // "config.gypi" -> config_raw. - int r = AddGypi("config", config, &definitions); - if (r != 0) { - return r; - } - Fragment out = Format(definitions, initializers, registrations); - return WriteIfChanged(out, dest); -} - -int PrintUsage(const char* argv0) { - fprintf(stderr, - "Usage: %s [--verbose] [--root /path/to/project/root] " - "path/to/output.cc path/to/directory " - "[extra-files ...]\n", - argv0); - return 1; -} - -int Main(int argc, char* argv[]) { - if (argc < 3) { - return PrintUsage(argv[0]); - } - - std::vector args; - args.reserve(argc); - std::string root_dir; - for (int i = 1; i < argc; ++i) { - std::string arg(argv[i]); - if (arg == "--verbose") { - is_verbose = true; - } else if (arg == "--root") { - if (i == argc - 1) { - fprintf(stderr, "--root must be followed by a path\n"); - return 1; - } - root_dir = argv[++i]; - } else { - args.emplace_back(argv[i]); - } - } - - if (args.size() < 2) { - return PrintUsage(argv[0]); - } - - if (!root_dir.empty()) { - int r = uv_chdir(root_dir.c_str()); - if (r != 0) { - fprintf(stderr, "Cannot switch to the directory specified by --root\n"); - PrintUvError("chdir", root_dir.c_str(), r); - return 1; - } - } - std::string output = args[0]; - - FileMap file_map; - for (size_t i = 1; i < args.size(); ++i) { - int error = 0; - const std::string& file = args[i]; - if (IsDirectory(file, &error)) { - if (!SearchFiles(file, &file_map, kJsSuffix) || - !SearchFiles(file, &file_map, kMjsSuffix)) { - return 1; - } - } else if (error != 0) { - return 1; - } else { // It's a file. - std::string_view extension = HasAllowedExtensions(file); - if (extension.size() != 0) { - auto it = file_map.insert({std::string(extension), FileList()}).first; - it->second.push_back(file); - } else { - fprintf(stderr, "Unsupported file: %s\n", file.c_str()); - return 1; - } - } - } - - // Should have exactly 3 types: `.js`, `.mjs` and `.gypi`. - assert(file_map.size() == 3); - auto gypi_it = file_map.find(".gypi"); - // Currently config.gypi is the only `.gypi` file allowed - if (gypi_it == file_map.end() || gypi_it->second.size() != 1 || - !FilenameIsConfigGypi(gypi_it->second[0])) { - fprintf( - stderr, - "Arguments should contain one and only one .gypi file: config.gypi\n"); - return 1; - } - auto js_it = file_map.find(".js"); - auto mjs_it = file_map.find(".mjs"); - assert(js_it != file_map.end() && mjs_it != file_map.end()); - - auto it = std::find(mjs_it->second.begin(), - mjs_it->second.end(), - "lib/eslint.config_partial.mjs"); - if (it != mjs_it->second.end()) { - mjs_it->second.erase(it); - } - - std::sort(js_it->second.begin(), js_it->second.end()); - std::sort(mjs_it->second.begin(), mjs_it->second.end()); - - return JS2C(js_it->second, mjs_it->second, gypi_it->second[0], output); -} -} // namespace js2c -} // namespace node - -NODE_MAIN(int argc, node::argv_type raw_argv[]) { - char** argv; - node::FixupMain(argc, raw_argv, &argv); - return node::js2c::Main(argc, argv); -} diff --git a/tools/js2c.rs b/tools/js2c.rs new file mode 100644 index 00000000000000..c7c4de931b9478 --- /dev/null +++ b/tools/js2c.rs @@ -0,0 +1,720 @@ +use std::collections::BTreeMap; +use std::fs; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; + +const TEMPLATE: &str = r#" +#include "env-inl.h" +#include "node_builtins.h" +#include "node_external_reference.h" +#include "node_internals.h" + +namespace node { + +namespace builtins { + +{definitions} +namespace { +const ThreadsafeCopyOnWrite global_source_map { + BuiltinSourceMap { +{initializers} + } // BuiltinSourceMap +}; // ThreadsafeCopyOnWrite +} // anonymous namespace + +void BuiltinLoader::LoadJavaScriptSource() { + source_ = global_source_map; +} + +void RegisterExternalReferencesForInternalizedBuiltinCode( + ExternalReferenceRegistry* registry) { +{registrations} +} + +UnionBytes BuiltinLoader::GetConfig() { + return UnionBytes(&config_resource); +} + +} // namespace builtins + +} // namespace node +"#; + +const LATIN1_STRING_LITERAL_START: &str = "reinterpret_cast(\""; +const ASCII_STRING_LITERAL_START: &str = + "reinterpret_cast(R\"JS2C1b732aee("; +const UTF16_STRING_LITERAL_START: &str = + "reinterpret_cast(uR\"JS2C1b732aee("; +const LATIN1_STRING_LITERAL_END: &str = "\");"; +const UTF_STRING_LITERAL_END: &str = ")JS2C1b732aee\");"; +const ARRAY_LITERAL_START: &str = "{\n"; +const ARRAY_LITERAL_END: &str = "\n};\n\n"; + +const MJS_SUFFIX: &str = ".mjs"; +const JS_SUFFIX: &str = ".js"; +const GYPI_SUFFIX: &str = ".gypi"; +const DEPS_PREFIX: &str = "deps/"; +const LIB_PREFIX: &str = "lib/"; + +#[cfg(node_js2c_use_string_literals)] +const USE_STRING_LITERALS: bool = true; +#[cfg(not(node_js2c_use_string_literals))] +const USE_STRING_LITERALS: bool = false; + +#[derive(Clone, Copy)] +enum CodeType { + Ascii, + Latin1, + TwoByte, +} + +#[derive(Clone, Copy)] +enum BuiltinSourceType { + BootstrapRealm, + BootstrapScript, + PerContextScript, + MainScript, + Function, + SourceTextModule, +} + +impl BuiltinSourceType { + fn as_cpp_name(self) -> &'static str { + match self { + Self::BootstrapRealm => "kBootstrapRealm", + Self::BootstrapScript => "kBootstrapScript", + Self::PerContextScript => "kPerContextScript", + Self::MainScript => "kMainScript", + Self::Function => "kFunction", + Self::SourceTextModule => "kSourceTextModule", + } + } +} + +struct Context { + verbose: bool, +} + +impl Context { + fn debug(&self, args: std::fmt::Arguments<'_>) { + if self.verbose { + let _ = io::stderr().write_fmt(args); + } + } +} + +macro_rules! debug { + ($ctx:expr, $($arg:tt)*) => { + $ctx.debug(format_args!($($arg)*)) + }; +} + +fn print_error(message: &str) { + let _ = writeln!(io::stderr(), "{message}"); +} + +fn filename_is_config_gypi(path: &str) -> bool { + path == "config.gypi" || path.ends_with("/config.gypi") +} + +fn has_allowed_extension(filename: &str) -> Option<&'static str> { + if filename.ends_with(GYPI_SUFFIX) { + Some(GYPI_SUFFIX) + } else if filename.ends_with(JS_SUFFIX) { + Some(JS_SUFFIX) + } else if filename.ends_with(MJS_SUFFIX) { + Some(MJS_SUFFIX) + } else { + None + } +} + +fn search_files( + dir: &Path, + js_files: &mut Vec, + mjs_files: &mut Vec, +) -> io::Result<()> { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + search_files(&path, js_files, mjs_files)?; + continue; + } + + let path_str = path_to_posix_string(&path)?; + if path_str.ends_with(JS_SUFFIX) { + js_files.push(path_str); + } else if path_str.ends_with(MJS_SUFFIX) { + mjs_files.push(path_str); + } + } + + Ok(()) +} + +fn path_to_posix_string(path: &Path) -> io::Result { + let value = path + .to_str() + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "non-utf8 path"))?; + Ok(value.replace('\\', "/")) +} + +fn read_file(path: &str, ctx: &Context) -> io::Result> { + debug!(ctx, "ReadFileSync {path}\n"); + fs::read(path) +} + +fn write_if_changed(out: &[u8], dest: &str, ctx: &Context) -> io::Result<()> { + debug!(ctx, "output size {}\n", out.len()); + + let changed = match fs::read(dest) { + Ok(existing) => { + debug!(ctx, "existing size {}\n", existing.len()); + existing != out + } + Err(err) if err.kind() == io::ErrorKind::NotFound => { + debug!(ctx, "existing size 0\n"); + true + } + Err(err) => return Err(err), + }; + + if !changed { + debug!(ctx, "No change, return\n"); + return Ok(()); + } + + debug!(ctx, "WriteFileSync {} bytes to {dest}\n", out.len()); + fs::write(dest, out) +} + +fn get_file_id(filename: &str) -> String { + let mut end = filename.len(); + let mut start = 0; + let mut prefix = ""; + + // Match the builtin IDs expected by Node's builtin loader by removing the + // source extension and rewriting the lib/ and deps/ roots. + if filename.ends_with(MJS_SUFFIX) { + end -= MJS_SUFFIX.len(); + } else if filename.ends_with(JS_SUFFIX) { + end -= JS_SUFFIX.len(); + } + + if filename.starts_with(DEPS_PREFIX) { + start = DEPS_PREFIX.len(); + prefix = "internal/deps/"; + } else if filename.starts_with(LIB_PREFIX) { + start = LIB_PREFIX.len(); + } + + format!("{prefix}{}", &filename[start..end]) +} + +fn get_variable_name(id: &str) -> String { + id.chars() + .map(|ch| match ch { + '.' | '-' | '/' => '_', + other => other, + }) + .collect() +} + +fn append_octal_code(out: &mut String, ch: u8) { + // Keep simple printable bytes readable in generated literals, but force + // escapes for bytes that could terminate or alter the C++ string literal. + if (b' '..=b'~').contains(&ch) && ch != b'\\' && ch != b'"' && ch != b'?' { + out.push(char::from(ch)); + return; + } + + out.push('\\'); + out.push(char::from(b'0' + ((ch >> 6) & 7))); + out.push(char::from(b'0' + ((ch >> 3) & 7))); + out.push(char::from(b'0' + (ch & 7))); +} + +fn append_decimal(out: &mut String, mut value: T) +where + T: Copy + Default + Eq + From + std::ops::DivAssign + std::ops::Rem, +{ + if value == T::default() { + out.push('0'); + return; + } + + let mut buf = [0u8; 20]; + let mut index = buf.len(); + while value != T::default() { + let digit = value % T::from(10); + value /= T::from(10); + index -= 1; + buf[index] = b'0' + digit_to_u8(digit); + } + for &digit in &buf[index..] { + out.push(char::from(digit)); + } +} + +fn digit_to_u8(value: T) -> u8 +where + T: Copy + Eq + From, +{ + let mut digit = 0u8; + while T::from(digit) != value { + digit += 1; + } + digit +} + +fn utf8_to_latin1(code: &[u8]) -> Option> { + let text = std::str::from_utf8(code).ok()?; + let mut out = Vec::with_capacity(text.len()); + for ch in text.chars() { + let value = ch as u32; + if value > 0xFF { + return None; + } + out.push(value as u8); + } + Some(out) +} + +fn simplify(code: &[u8], var: &str, ctx: &Context) -> Option> { + if var != "internal_deps_undici_undici" { + return None; + } + + // Keep this hot-fix allowlisted so we do not silently rewrite arbitrary + // builtins while trying to save space in the embedded output. + let mut simplified = Vec::with_capacity(code.len()); + let mut count = 0usize; + let mut i = 0usize; + while i < code.len() { + if i + 2 < code.len() && code[i] == 226 && code[i + 1] == 128 && code[i + 2] == 153 { + simplified.push(b'\''); + count += 1; + i += 3; + continue; + } + + simplified.push(code[i]); + i += 1; + } + + if count > 0 { + debug!( + ctx, + "Simplified {count} characters, old size {}, new size {}\n", + code.len(), + simplified.len() + ); + Some(simplified) + } else { + None + } +} + +fn get_builtin_source_type(id: &str, filename: &str) -> BuiltinSourceType { + if filename.ends_with(MJS_SUFFIX) { + return BuiltinSourceType::SourceTextModule; + } + if id.starts_with("internal/bootstrap/realm") { + return BuiltinSourceType::BootstrapRealm; + } + if id.starts_with("internal/bootstrap/") { + return BuiltinSourceType::BootstrapScript; + } + if id.starts_with("internal/per_context/") { + return BuiltinSourceType::PerContextScript; + } + if id.starts_with("internal/main/") { + return BuiltinSourceType::MainScript; + } + if id.starts_with("internal/deps/v8/tools/") { + return BuiltinSourceType::SourceTextModule; + } + BuiltinSourceType::Function +} + +fn get_definition(var: &str, code: &[u8], ctx: &Context) -> io::Result { + debug!(ctx, "GetDefinition {var}, code size {}\n", code.len()); + + if code.iter().all(|byte| *byte < 0x80) { + debug!(ctx, "ASCII-only, static size {}\n", code.len()); + return get_definition_impl(var, code, CodeType::Ascii, ctx); + } + + if let Some(latin1) = utf8_to_latin1(code) { + debug!( + ctx, + "Latin-1-only, old size {}, new size {}\n", + code.len(), + latin1.len() + ); + return get_definition_impl(var, &latin1, CodeType::Latin1, ctx); + } + + if let Some(simplified) = simplify(code, var, ctx) { + debug!(ctx, "{var} is simplified, re-generate definition\n"); + return get_definition(var, &simplified, ctx); + } + + get_definition_impl(var, code, CodeType::TwoByte, ctx) +} + +fn get_definition_impl( + var: &str, + code: &[u8], + code_type: CodeType, + ctx: &Context, +) -> io::Result { + let (arr_type, resource_type) = match code_type { + CodeType::Ascii | CodeType::Latin1 => ("uint8_t", "StaticExternalOneByteResource"), + CodeType::TwoByte => ("uint16_t", "StaticExternalTwoByteResource"), + }; + + let utf8_text = match code_type { + CodeType::Ascii | CodeType::Latin1 => None, + CodeType::TwoByte => Some( + std::str::from_utf8(code) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid utf-8 source"))?, + ), + }; + let utf16_code = if USE_STRING_LITERALS { + None + } else { + match code_type { + CodeType::Ascii | CodeType::Latin1 => None, + CodeType::TwoByte => Some(utf8_text.expect("checked above").encode_utf16().collect::>()), + } + }; + let count = match code_type { + CodeType::Ascii | CodeType::Latin1 => code.len(), + CodeType::TwoByte => match &utf16_code { + Some(utf16) => utf16.len(), + None => utf8_text.expect("checked above").encode_utf16().count(), + }, + }; + + let mut result = String::with_capacity(128 + code.len() * 4); + if USE_STRING_LITERALS { + result.push_str("static const "); + result.push_str(arr_type); + result.push_str(" *"); + result.push_str(var); + result.push_str("_raw = "); + match code_type { + CodeType::Ascii => { + result.push_str(ASCII_STRING_LITERAL_START); + result.push_str(std::str::from_utf8(code).map_err(|_| { + io::Error::new(io::ErrorKind::InvalidData, "invalid ascii source") + })?); + result.push_str(UTF_STRING_LITERAL_END); + } + CodeType::Latin1 => { + result.push_str(LATIN1_STRING_LITERAL_START); + for (index, ch) in code.iter().copied().enumerate() { + if ch > 127 { + debug!( + ctx, + "In {var}, found non-ASCII Latin-1 character at {index}: {ch}\n" + ); + } + append_octal_code(&mut result, ch); + } + result.push_str(LATIN1_STRING_LITERAL_END); + } + CodeType::TwoByte => { + // Preserve the original UTF-8 source in a UTF-16 raw literal so + // the compiler performs the code unit conversion for us. + result.push_str(UTF16_STRING_LITERAL_START); + result.push_str(utf8_text.expect("checked above")); + result.push_str(UTF_STRING_LITERAL_END); + } + } + } else { + result.push_str("static const "); + result.push_str(arr_type); + result.push(' '); + result.push_str(var); + result.push_str("_raw[] = "); + result.push_str(ARRAY_LITERAL_START); + + match code_type { + CodeType::Ascii | CodeType::Latin1 => { + for (index, ch) in code.iter().copied().enumerate() { + if ch > 127 { + debug!( + ctx, + "In {var}, found non-ASCII Latin-1 character at {index}: {ch}\n" + ); + } + append_decimal(&mut result, ch); + result.push(','); + } + } + CodeType::TwoByte => { + let utf16 = utf16_code.expect("computed above"); + debug!(ctx, "static size {}\n", utf16.len()); + for code_unit in utf16 { + append_decimal(&mut result, code_unit); + result.push(','); + } + } + } + + result.push_str(ARRAY_LITERAL_END); + } + + result.push_str("static "); + result.push_str(resource_type); + result.push(' '); + result.push_str(var); + result.push_str("_resource("); + result.push_str(var); + result.push_str("_raw, "); + append_decimal(&mut result, count); + result.push_str(", nullptr);\n"); + Ok(result) +} + +fn replace_all(data: &[u8], search: &[u8], replacement: &[u8]) -> Vec { + let mut result = Vec::with_capacity(data.len()); + let mut cursor = 0usize; + + while let Some(found) = data[cursor..] + .windows(search.len()) + .position(|window| window == search) + { + let absolute = cursor + found; + result.extend_from_slice(&data[cursor..absolute]); + result.extend_from_slice(replacement); + cursor = absolute + search.len(); + } + + result.extend_from_slice(&data[cursor..]); + result +} + +fn strip_comments(input: &[u8]) -> Vec { + let mut result = Vec::with_capacity(input.len()); + + for chunk in input.split_inclusive(|byte| *byte == b'\n') { + // config.gypi only needs line-based shell-style comments removed before + // it can be embedded as JSON-ish data. + if let Some(pos) = chunk.iter().position(|byte| *byte == b'#') { + result.extend_from_slice(&chunk[..pos]); + if chunk.ends_with(b"\n") { + result.push(b'\n'); + } + } else { + result.extend_from_slice(chunk); + } + } + + result +} + +fn jsonify(code: &[u8]) -> Vec { + // Preserve the existing js2c behavior for config.gypi by removing comment + // lines and unquoting the pseudo-boolean strings it contains. + let stripped = strip_comments(code); + let result1 = replace_all(&stripped, br#""true""#, b"true"); + replace_all(&result1, br#""false""#, b"false") +} + +fn add_module( + filename: &str, + definitions: &mut String, + initializers: &mut String, + registrations: &mut String, + ctx: &Context, +) -> io::Result<()> { + debug!(ctx, "AddModule {filename} start\n"); + let code = read_file(filename, ctx)?; + let file_id = get_file_id(filename); + let var = get_variable_name(&file_id); + + definitions.push_str(&get_definition(&var, &code, ctx)?); + definitions.push('\n'); + let source_type = get_builtin_source_type(&file_id, filename).as_cpp_name(); + initializers.push_str(" {\""); + initializers.push_str(&file_id); + initializers.push_str("\", BuiltinSource{ \""); + initializers.push_str(&file_id); + initializers.push_str("\", UnionBytes(&"); + initializers.push_str(&var); + initializers.push_str("_resource), BuiltinSourceType::"); + initializers.push_str(source_type); + initializers.push_str("} },\n"); + registrations.push_str(" registry->Register(&"); + registrations.push_str(&var); + registrations.push_str("_resource);\n"); + Ok(()) +} + +fn add_gypi( + var: &str, + filename: &str, + definitions: &mut String, + ctx: &Context, +) -> io::Result<()> { + debug!(ctx, "AddGypi {filename} start\n"); + let code = read_file(filename, ctx)?; + let transformed = jsonify(&code); + definitions.push_str(&get_definition(var, &transformed, ctx)?); + definitions.push('\n'); + Ok(()) +} + +fn format_output(definitions: &str, initializers: &str, registrations: &str) -> Vec { + let mut out = String::with_capacity( + TEMPLATE.len() + definitions.len() + initializers.len() + registrations.len(), + ); + let mut rest = TEMPLATE; + for (needle, replacement) in [ + ("{definitions}", definitions), + ("{initializers}", initializers), + ("{registrations}", registrations), + ] { + let (head, tail) = rest + .split_once(needle) + .expect("template placeholder missing"); + out.push_str(head); + out.push_str(replacement); + rest = tail; + } + out.push_str(rest); + out.into_bytes() +} + +fn js2c(js_files: &[String], mjs_files: &[String], config: &str, dest: &str, ctx: &Context) -> io::Result<()> { + let input_count = js_files.len() + mjs_files.len() + 1; + let mut definitions = String::with_capacity(input_count * 256); + let mut initializers = String::with_capacity(input_count * 96); + let mut registrations = String::with_capacity(input_count * 48); + + for filename in js_files { + add_module(filename, &mut definitions, &mut initializers, &mut registrations, ctx)?; + } + for filename in mjs_files { + add_module(filename, &mut definitions, &mut initializers, &mut registrations, ctx)?; + } + + add_gypi("config", config, &mut definitions, ctx)?; + let out = format_output(&definitions, &initializers, ®istrations); + write_if_changed(&out, dest, ctx) +} + +fn print_usage(argv0: &str) -> i32 { + print_error(&format!( + "Usage: {argv0} [--verbose] [--root /path/to/project/root] path/to/output.cc path/to/directory [extra-files ...]" + )); + 1 +} + +fn main_impl() -> Result<(), i32> { + let mut raw_args = std::env::args(); + let argv0 = raw_args.next().unwrap_or_else(|| "js2c".to_string()); + + let mut args = Vec::new(); + let mut root_dir: Option = None; + let mut verbose = false; + + while let Some(arg) = raw_args.next() { + match arg.as_str() { + "--verbose" => verbose = true, + "--root" => { + let Some(path) = raw_args.next() else { + print_error("--root must be followed by a path"); + return Err(1); + }; + root_dir = Some(path); + } + _ => args.push(arg), + } + } + + if args.len() < 2 { + return Err(print_usage(&argv0)); + } + + if let Some(root_dir) = root_dir { + if let Err(err) = std::env::set_current_dir(&root_dir) { + print_error("Cannot switch to the directory specified by --root"); + print_error(&format!("chdir {root_dir}: {err}")); + return Err(1); + } + } + + let ctx = Context { verbose }; + let output = args[0].clone(); + let mut file_map: BTreeMap> = BTreeMap::new(); + + for file in &args[1..] { + let path = PathBuf::from(file); + if path.is_dir() { + let js_files = file_map.entry(JS_SUFFIX.to_string()).or_default(); + let mut found_js = std::mem::take(js_files); + let mjs_files = file_map.entry(MJS_SUFFIX.to_string()).or_default(); + let mut found_mjs = std::mem::take(mjs_files); + if let Err(err) = search_files(&path, &mut found_js, &mut found_mjs) { + print_error(&format!("scandir {}: {err}", path.display())); + return Err(1); + } + file_map.insert(JS_SUFFIX.to_string(), found_js); + file_map.insert(MJS_SUFFIX.to_string(), found_mjs); + continue; + } + + if !path.exists() { + print_error(&format!("Unsupported or missing file: {file}")); + return Err(1); + } + + match has_allowed_extension(file) { + Some(extension) => file_map.entry(extension.to_string()).or_default().push(file.clone()), + None => { + print_error(&format!("Unsupported file: {file}")); + return Err(1); + } + } + } + + let Some(gypi_files) = file_map.get(GYPI_SUFFIX) else { + print_error("Arguments should contain one and only one .gypi file: config.gypi"); + return Err(1); + }; + if gypi_files.len() != 1 || !filename_is_config_gypi(&gypi_files[0]) { + print_error("Arguments should contain one and only one .gypi file: config.gypi"); + return Err(1); + } + let config = gypi_files[0].clone(); + + let mut js_files = file_map.remove(JS_SUFFIX).unwrap_or_default(); + let mut mjs_files = file_map.remove(MJS_SUFFIX).unwrap_or_default(); + + if let Some(index) = mjs_files + .iter() + .position(|file| file == "lib/eslint.config_partial.mjs") + { + mjs_files.remove(index); + } + + js_files.sort(); + mjs_files.sort(); + + js2c(&js_files, &mjs_files, &config, &output, &ctx).map_err(|err| { + print_error(&format!("{err}")); + 1 + }) +} + +fn main() { + std::process::exit(match main_impl() { + Ok(()) => 0, + Err(code) => code, + }); +} diff --git a/unofficial.gni b/unofficial.gni index aa78f9ce60c043..7efe48132a52e6 100644 --- a/unofficial.gni +++ b/unofficial.gni @@ -314,20 +314,47 @@ template("node_gn_build") { ] } - executable("node_js2c") { - deps = [ - "deps/uv", - "$node_simdutf_path", + if (host_os == "win") { + host_executable_suffix = ".exe" + } else { + host_executable_suffix = "" + } + node_js2c_rustc_args = [ + "--edition=2021", + "--crate-name", + "node_js2c", + ] + if (is_debug) { + node_js2c_rustc_args += [ + "-C", + "debuginfo=2", + "-C", + "opt-level=0", ] - sources = [ - "tools/js2c.cc", - "tools/executable_wrapper.h", - "src/embedded_data.cc", - "src/embedded_data.h", - "src/builtin_info.cc", - "src/builtin_info.h", + } else { + node_js2c_rustc_args += [ + "-C", + "opt-level=2", + ] + } + if (host_os == "linux" || host_os == "mac" || host_os == "openharmony") { + node_js2c_rustc_args += [ + "--cfg", + "node_js2c_use_string_literals", ] - include_dirs = [ "src" ] + } + + action("node_js2c") { + script = "$node_v8_path/tools/run.py" + inputs = [ "tools/js2c.rs" ] + outputs = [ "$root_out_dir/node_js2c$host_executable_suffix" ] + args = [ "rustc" ] + node_js2c_rustc_args + + [ + rebase_path("tools/js2c.rs", root_build_dir), + "-o", + rebase_path("$root_out_dir/node_js2c$host_executable_suffix", + root_build_dir), + ] } action("run_node_js2c") { @@ -349,16 +376,9 @@ template("node_gn_build") { [ "$target_gen_dir/config.gypi" ] outputs = [ "$target_gen_dir/node_javascript.cc" ] - # Get the path to node_js2c executable of the host toolchain. - if (host_os == "win") { - host_executable_suffix = ".exe" - } else { - host_executable_suffix = "" - } node_js2c_path = - get_label_info(":node_js2c($host_toolchain)", "root_out_dir") + "/" + - get_label_info(":node_js2c($host_toolchain)", "name") + - host_executable_suffix + get_label_info(":node_js2c($host_toolchain)", "root_out_dir") + + "/node_js2c" + host_executable_suffix args = [ rebase_path(node_js2c_path), rebase_path("$target_gen_dir/node_javascript.cc"),