tools/js2c.cc - external/github.com/v8/node - Git at Google

 #include <algorithm>
 #include <array>
 #include <cassert>
 #include <cctype>
 #include <cstdarg>
 #include <cstdio>
 #include <functional>
 #include <map>
 #include <string>
 #include <string_view>
 #include <vector>
 #include "builtin_info.h"
 #include "embedded_data.h"
 #include "executable_wrapper.h"
 #include "simdutf.h"
 #include "uv.h"

 #if defined(_WIN32)
 #include <io.h>  // _S_IREAD _S_IWRITE
 #ifndef S_IRUSR
 #define S_IRUSR _S_IREAD
 #endif  // S_IRUSR
 #ifndef S_IWUSR
 #define S_IWUSR _S_IWRITE
 #endif  // S_IWUSR
 #endif
 namespace node {
 namespace js2c {
 int Main(int argc, char* argv[]);

 static bool is_verbose = false;

 void Debug(const char* format, ...) {
   va_list arguments;
   va_start(arguments, format);
   if (is_verbose) {
     vfprintf(stderr, format, arguments);
   }
   va_end(arguments);
 }

 void PrintUvError(const char* syscall, const char* filename, int error) {
   fprintf(stderr, "[%s] %s: %s\n", syscall, filename, uv_strerror(error));
 }

 int GetStats(const char* path, std::function<void(const uv_stat_t*)> func) {
   uv_fs_t req;
   int r = uv_fs_stat(nullptr, &req, path, nullptr);
   if (r == 0) {
     func(static_cast<const uv_stat_t*>(req.ptr));
   }
   uv_fs_req_cleanup(&req);
   return r;
 }

 bool IsDirectory(const std::string& filename, int* error) {
   bool result = false;
   *error = GetStats(filename.c_str(), [&](const uv_stat_t* stats) {
     result = !!(stats->st_mode & S_IFDIR);
   });
   if (*error != 0) {
     PrintUvError("stat", filename.c_str(), *error);
   }
   return result;
 }

 size_t GetFileSize(const std::string& filename, int* error) {
   size_t result = 0;
   *error = GetStats(filename.c_str(),
                     [&](const uv_stat_t* stats) { result = stats->st_size; });
   return result;
 }

 constexpr bool FilenameIsConfigGypi(const std::string_view path) {
   return path == "config.gypi" || path.ends_with("/config.gypi");
 }

 typedef std::vector<std::string> FileList;
 typedef std::map<std::string, FileList> FileMap;

 bool SearchFiles(const std::string& dir,
                  FileMap* file_map,
                  std::string_view extension) {
   uv_fs_t scan_req;
   int result = uv_fs_scandir(nullptr, &scan_req, dir.c_str(), 0, nullptr);
   bool errored = false;
   if (result < 0) {
     PrintUvError("scandir", dir.c_str(), result);
     errored = true;
   } else {
     auto it = file_map->insert({std::string(extension), FileList()}).first;
     FileList& files = it->second;
     files.reserve(files.size() + result);
     uv_dirent_t dent;
     while (true) {
       result = uv_fs_scandir_next(&scan_req, &dent);
       if (result == UV_EOF) {
         break;
       }

       if (result != 0) {
         PrintUvError("scandir_next", dir.c_str(), result);
         errored = true;
         break;
       }

       std::string path = dir + '/' + dent.name;
       if (path.ends_with(extension)) {
         files.emplace_back(path);
         continue;
       }
       if (!IsDirectory(path, &result)) {
         if (result == 0) {  // It's a file, no need to search further.
           continue;
         } else {
           errored = true;
           break;
         }
       }

       if (!SearchFiles(path, file_map, extension)) {
         errored = true;
         break;
       }
     }
   }

   uv_fs_req_cleanup(&scan_req);
   return !errored;
 }

 constexpr std::string_view kMjsSuffix = ".mjs";
 constexpr std::string_view kJsSuffix = ".js";
 constexpr std::string_view kGypiSuffix = ".gypi";
 constexpr std::string_view depsPrefix = "deps/";
 constexpr std::string_view libPrefix = "lib/";

 constexpr std::string_view HasAllowedExtensions(
     const std::string_view filename) {
   for (const auto& ext : {kGypiSuffix, kJsSuffix, kMjsSuffix}) {
     if (filename.ends_with(ext)) {
       return ext;
     }
   }
   return {};
 }

 using Fragment = std::vector<char>;
 using Fragments = std::vector<std::vector<char>>;

 std::vector<char> Join(const Fragments& fragments,
                        const std::string& separator) {
   size_t length = separator.size() * (fragments.size() - 1);
   for (size_t i = 0; i < fragments.size(); ++i) {
     length += fragments[i].size();
   }
   std::vector<char> buf(length, 0);
   size_t cursor = 0;
   for (size_t i = 0; i < fragments.size(); ++i) {
     const Fragment& fragment = fragments[i];
     // Avoid using snprintf on large chunks of data because it's much slower.
     // It's fine to use it on small amount of data though.
     if (i != 0) {
       memcpy(buf.data() + cursor, separator.c_str(), separator.size());
       cursor += separator.size();
     }
     memcpy(buf.data() + cursor, fragment.data(), fragment.size());
     cursor += fragment.size();
   }
   buf.resize(cursor);
   return buf;
 }

 const char* kTemplate = R"(
 #include "env-inl.h"
 #include "node_builtins.h"
 #include "node_external_reference.h"
 #include "node_internals.h"

 namespace node {

 namespace builtins {

 %.*s
 namespace {
 const ThreadsafeCopyOnWrite<BuiltinSourceMap> global_source_map {
   BuiltinSourceMap {
 %.*s
   }  // BuiltinSourceMap
 };  // ThreadsafeCopyOnWrite
 }  // anonymous namespace

 void BuiltinLoader::LoadJavaScriptSource() {
   source_ = global_source_map;
 }

 void RegisterExternalReferencesForInternalizedBuiltinCode(
   ExternalReferenceRegistry* registry) {
 %.*s
 }

 UnionBytes BuiltinLoader::GetConfig() {
   return UnionBytes(&config_resource);
 }

 }  // namespace builtins

 }  // namespace node
 )";

 Fragment Format(const Fragments& definitions,
                 const Fragments& initializers,
                 const Fragments& registrations) {
   std::vector<char> def_buf = Join(definitions, "\n");
   size_t def_size = def_buf.size();
   std::vector<char> init_buf = Join(initializers, "\n");
   size_t init_size = init_buf.size();
   std::vector<char> reg_buf = Join(registrations, "\n");
   size_t reg_size = reg_buf.size();

   size_t result_size =
       def_size + init_size + reg_size + strlen(kTemplate) + 100;
   std::vector<char> result(result_size, 0);
   int r = snprintf(result.data(),
                    result_size,
                    kTemplate,
                    static_cast<int>(def_buf.size()),
                    def_buf.data(),
                    static_cast<int>(init_buf.size()),
                    init_buf.data(),
                    static_cast<int>(reg_buf.size()),
                    reg_buf.data());
   result.resize(r);
   return result;
 }

 std::vector<char> ReadFileSync(const char* path, size_t size, int* error) {
   uv_fs_t req;
   Debug("ReadFileSync %s with size %zu\n", path, size);

   uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr);
   if (req.result < 0) {
     uv_fs_req_cleanup(&req);
     *error = req.result;
     return std::vector<char>();
   }
   uv_fs_req_cleanup(&req);

   std::vector<char> contents(size);
   size_t offset = 0;

   while (offset < size) {
     uv_buf_t buf = uv_buf_init(contents.data() + offset, size - offset);
     int bytes_read = uv_fs_read(nullptr, &req, file, &buf, 1, offset, nullptr);
     offset += bytes_read;
     *error = req.result;
     uv_fs_req_cleanup(&req);
     if (*error < 0) {
       uv_fs_close(nullptr, &req, file, nullptr);
       // We can't do anything if uv_fs_close returns error, so just return.
       return std::vector<char>();
     }
     if (bytes_read <= 0) {
       break;
     }
   }
   assert(offset == size);

   *error = uv_fs_close(nullptr, &req, file, nullptr);
   return contents;
 }

 int WriteFileSync(const std::vector<char>& out, const char* path) {
   Debug("WriteFileSync %zu bytes to %s\n", out.size(), path);
   uv_fs_t req;
   uv_file file = uv_fs_open(nullptr,
                             &req,
                             path,
                             UV_FS_O_CREAT | UV_FS_O_WRONLY | UV_FS_O_TRUNC,
                             S_IWUSR | S_IRUSR,
                             nullptr);
   int err = req.result;
   uv_fs_req_cleanup(&req);
   if (err < 0) {
     return err;
   }

   uv_buf_t buf = uv_buf_init(const_cast<char*>(out.data()), out.size());
   err = uv_fs_write(nullptr, &req, file, &buf, 1, 0, nullptr);
   uv_fs_req_cleanup(&req);

   int r = uv_fs_close(nullptr, &req, file, nullptr);
   uv_fs_req_cleanup(&req);
   if (err < 0) {
     // We can't do anything if uv_fs_close returns error, so just return.
     return err;
   }
   return r;
 }

 int WriteIfChanged(const Fragment& out, const std::string& dest) {
   Debug("output size %zu\n", out.size());

   int error = 0;
   size_t size = GetFileSize(dest, &error);
   if (error != 0 && error != UV_ENOENT) {
     return error;
   }
   Debug("existing size %zu\n", size);

   bool changed = true;
   // If it's not the same size, the file is definitely changed so we'll
   // just proceed to update. Otherwise check the content before deciding
   // whether we want to write it.
   if (error != UV_ENOENT && size == out.size()) {
     std::vector<char> content = ReadFileSync(dest.c_str(), size, &error);
     if (error == 0) {  // In case of error, always write the file.
       changed = (memcmp(content.data(), out.data(), size) != 0);
     }
   }
   if (!changed) {
     Debug("No change, return\n");
     return 0;
   }
   return WriteFileSync(out, dest.c_str());
 }

 std::string GetFileId(const std::string& filename) {
   size_t end = filename.size();
   size_t start = 0;
   std::string prefix;
   // Strip .mjs and .js suffix
   if (filename.ends_with(kMjsSuffix)) {
     end -= kMjsSuffix.size();
   } else if (filename.ends_with(kJsSuffix)) {
     end -= kJsSuffix.size();
   }

   // deps/acorn/acorn/dist/acorn.js -> internal/deps/acorn/acorn/dist/acorn
   if (filename.starts_with(depsPrefix)) {
     start = depsPrefix.size();
     prefix = "internal/deps/";
   } else if (filename.starts_with(libPrefix)) {
     // lib/internal/url.js -> internal/url
     start = libPrefix.size();
     prefix = "";
   }

   return prefix + std::string(filename.begin() + start, filename.begin() + end);
 }

 std::string GetVariableName(const std::string& id) {
   std::string result = id;
   size_t length = result.size();

   for (size_t i = 0; i < length; ++i) {
     if (result[i] == '.' || result[i] == '-' || result[i] == '/') {
       result[i] = '_';
     }
   }
   return result;
 }

 // The function returns a string buffer and an array of
 // offsets. The string is just "0,1,2,3,...,65535,".
 // The second array contain the offsets indicating the
 // start of each substring ("0,", "1,", etc.) and the final
 // offset points just beyond the end of the string.
 // 382106 is the length of the string "0,1,2,3,...,65535,".
 // 65537 is 2**16 + 1
 // This function could be constexpr, but it might become too expensive to
 // compile.
 std::pair<std::array<char, 382106>, std::array<uint32_t, 65537>>
 precompute_string() {
   // the string "0,1,2,3,...,65535,".
   std::array<char, 382106> str;
   // the offsets in the string pointing at the beginning of each substring
   std::array<uint32_t, 65537> off;
   off[0] = 0;
   char* p = &str[0];
   constexpr auto const_int_to_str = [](uint16_t value, char* s) -> uint32_t {
     uint32_t index = 0;
     do {
       s[index++] = '0' + (value % 10);
       value /= 10;
     } while (value != 0);

     for (uint32_t i = 0; i < index / 2; ++i) {
       char temp = s[i];
       s[i] = s[index - i - 1];
       s[index - i - 1] = temp;
     }
     s[index] = ',';
     return index + 1;
   };
   for (int i = 0; i < 65536; ++i) {
     size_t offset = const_int_to_str(i, p);
     p += offset;
     off[i + 1] = off[i] + offset;
   }
   return {str, off};
 }

 const std::string_view GetCode(uint16_t index) {
   // We use about 644254 bytes of memory. An array of 65536 strings might use
   // 2097152 bytes so we save 3x the memory.
   static auto [backing_string, offsets] = precompute_string();
   return std::string_view(&backing_string[offsets[index]],
                           offsets[index + 1] - offsets[index]);
 }

 #ifdef NODE_JS2C_USE_STRING_LITERALS
 const char* string_literal_def_template = "static const %s *%s_raw = ";
 constexpr std::string_view latin1_string_literal_start =
     "reinterpret_cast<const uint8_t*>(\"";
 constexpr std::string_view ascii_string_literal_start =
     "reinterpret_cast<const uint8_t*>(R\"JS2C1b732aee(";
 constexpr std::string_view utf16_string_literal_start =
     "reinterpret_cast<const uint16_t*>(uR\"JS2C1b732aee(";
 constexpr std::string_view latin1_string_literal_end = "\");";
 constexpr std::string_view utf_string_literal_end = ")JS2C1b732aee\");";
 #else
 const char* array_literal_def_template = "static const %s %s_raw[] = ";
 constexpr std::string_view array_literal_start = "{\n";
 constexpr std::string_view array_literal_end = "\n};\n\n";
 #endif

 // Definitions:
 // static const uint8_t fs_raw[] = {
 //  ....
 // };
 //
 // static StaticExternalOneByteResource fs_resource(fs_raw, 1234, nullptr);
 //
 // static const uint16_t internal_cli_table_raw[] = {
 //  ....
 // };
 //
 // static StaticExternalTwoByteResource
 // internal_cli_table_resource(internal_cli_table_raw, 1234, nullptr);
 //
 // If NODE_JS2C_USE_STRING_LITERALS is defined, the data is output as C++
 // raw strings (i.e. R"JS2C1b732aee(...)JS2C1b732aee") rather than as an
 // array. This speeds up compilation for gcc/clang.
 enum class CodeType {
   kAscii,   // Code points are all within 0-127
   kLatin1,  // Code points are all within 0-255
   kTwoByte,
 };
 template <typename T>
 Fragment GetDefinitionImpl(const std::vector<char>& code,
                            const std::string& var,
                            CodeType type) {
   constexpr bool is_two_byte = std::is_same_v<T, uint16_t>;
   static_assert(is_two_byte || std::is_same_v<T, char>);

   size_t count = is_two_byte
                      ? simdutf::utf16_length_from_utf8(code.data(), code.size())
                      : code.size();
   constexpr const char* arr_type = is_two_byte ? "uint16_t" : "uint8_t";
   constexpr const char* resource_type = is_two_byte
                                             ? "StaticExternalTwoByteResource"
                                             : "StaticExternalOneByteResource";

 #ifdef NODE_JS2C_USE_STRING_LITERALS
   const char* literal_def_template = string_literal_def_template;
   // For code that contains Latin-1 characters, be conservative and assume
   // they all need escaping: one "\" and three digits.
   size_t unit = type == CodeType::kLatin1 ? 4 : 1;
   size_t def_size = 512 + code.size() * unit;
 #else
   const char* literal_def_template = array_literal_def_template;
   constexpr size_t unit =
       (is_two_byte ? 5 : 3) + 1;  // 0-65536 or 0-255 and a ","
   size_t def_size = 512 + count * unit;
 #endif

   Fragment result(def_size, 0);

   int cur = snprintf(
       result.data(), def_size, literal_def_template, arr_type, var.c_str());

   assert(cur != 0);

 #ifdef NODE_JS2C_USE_STRING_LITERALS
   std::string_view start_string_view;
   switch (type) {
     case CodeType::kAscii:
       start_string_view = ascii_string_literal_start;
       break;
     case CodeType::kLatin1:
       start_string_view = latin1_string_literal_start;
       break;
     case CodeType::kTwoByte:
       start_string_view = utf16_string_literal_start;
       break;
   }

   memcpy(
       result.data() + cur, start_string_view.data(), start_string_view.size());
   cur += start_string_view.size();

   if (type != CodeType::kLatin1) {
     memcpy(result.data() + cur, code.data(), code.size());
     cur += code.size();
   } else {
     const uint8_t* ptr = reinterpret_cast<const uint8_t*>(code.data());
     for (size_t i = 0; i < count; ++i) {
       // Avoid using snprintf on large chunks of data because it's much slower.
       // It's fine to use it on small amount of data though.
       uint8_t ch = ptr[i];
       if (ch > 127) {
         Debug("In %s, found non-ASCII Latin-1 character at %zu: %d\n",
               var.c_str(),
               i,
               ch);
       }
       const std::string& str = GetOctalCode(ch);
       memcpy(result.data() + cur, str.c_str(), str.size());
       cur += str.size();
     }
   }

   std::string_view string_literal_end;
   switch (type) {
     case CodeType::kAscii:
       string_literal_end = utf_string_literal_end;
       break;
     case CodeType::kLatin1:
       string_literal_end = latin1_string_literal_end;
       break;
     case CodeType::kTwoByte:
       string_literal_end = utf_string_literal_end;
       break;
   }
   memcpy(result.data() + cur,
          string_literal_end.data(),
          string_literal_end.size());
   cur += string_literal_end.size();
 #else
   memcpy(result.data() + cur,
          array_literal_start.data(),
          array_literal_start.size());
   cur += array_literal_start.size();

   // Avoid using snprintf on large chunks of data because it's much slower.
   // It's fine to use it on small amount of data though.
   if constexpr (is_two_byte) {
     std::vector<uint16_t> utf16_codepoints(count);
     size_t utf16_count = simdutf::convert_utf8_to_utf16(
         code.data(),
         code.size(),
         reinterpret_cast<char16_t*>(utf16_codepoints.data()));
     assert(utf16_count != 0);
     utf16_codepoints.resize(utf16_count);
     Debug("static size %zu\n", utf16_count);
     for (size_t i = 0; i < utf16_count; ++i) {
       std::string_view str = GetCode(utf16_codepoints[i]);
       memcpy(result.data() + cur, str.data(), str.size());
       cur += str.size();
     }
   } else {
     const uint8_t* ptr = reinterpret_cast<const uint8_t*>(code.data());
     for (size_t i = 0; i < count; ++i) {
       uint16_t ch = static_cast<uint16_t>(ptr[i]);
       if (ch > 127) {
         Debug("In %s, found non-ASCII Latin-1 character at %zu: %d\n",
               var.c_str(),
               i,
               ch);
       }
       std::string_view str = GetCode(ch);
       memcpy(result.data() + cur, str.data(), str.size());
       cur += str.size();
     }
   }

   memcpy(
       result.data() + cur, array_literal_end.data(), array_literal_end.size());
   cur += array_literal_end.size();
 #endif

   int end_size = snprintf(result.data() + cur,
                           result.size() - cur,
                           "static %s %s_resource(%s_raw, %zu, nullptr);\n",
                           resource_type,
                           var.c_str(),
                           var.c_str(),
                           count);
   cur += end_size;
   result.resize(cur);
   return result;
 }

 bool Simplify(const std::vector<char>& code,
               const std::string& var,
               std::vector<char>* simplified) {
   // Allowlist files to avoid false positives.
   // TODO(joyeecheung): this could be removed if undici updates itself
   // to replace "’" with "'" though we could still keep this skeleton in
   // place for future hot fixes that are verified by humans.
   if (var != "internal_deps_undici_undici") {
     return false;
   }

   size_t code_size = code.size();
   simplified->reserve(code_size);
   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(code.data());
   size_t simplified_count = 0;
   for (size_t i = 0; i < code_size; ++i) {
     switch (ptr[i]) {
       case 226: {  // ’ [ 226, 128, 153 ] -> '
         if (i + 2 < code_size && ptr[i + 1] == 128 && ptr[i + 2] == 153) {
           simplified->push_back('\'');
           i += 2;
           simplified_count++;
           break;
         }
         [[fallthrough]];
       }
       default: {
         simplified->push_back(code[i]);
         break;
       }
     }
   }

   if (simplified_count > 0) {
     Debug("Simplified %lu characters, ", simplified_count);
     Debug("old size %lu, new size %lu\n", code_size, simplified->size());
     return true;
   }
   return false;
 }

 Fragment GetDefinition(const std::string& var, const std::vector<char>& code) {
   Debug("GetDefinition %s, code size %zu\n", var.c_str(), code.size());
   bool is_ascii = simdutf::validate_ascii(code.data(), code.size());

   if (is_ascii) {
     Debug("ASCII-only, static size %zu\n", code.size());
     return GetDefinitionImpl<char>(code, var, CodeType::kAscii);
   }

   std::vector<char> latin1(code.size());
   auto result = simdutf::convert_utf8_to_latin1_with_errors(
       code.data(), code.size(), latin1.data());
   if (!result.error) {
     latin1.resize(result.count);
     Debug("Latin-1-only, old size %zu, new size %zu\n",
           code.size(),
           latin1.size());
     return GetDefinitionImpl<char>(latin1, var, CodeType::kLatin1);
   }

   // Since V8 only supports Latin-1 and UTF16 as underlying representation
   // we have to encode all files containing two-byte characters as UTF16.
   // While some files do need two-byte characters, some just
   // unintentionally have them. Replace certain characters that are known
   // to have sane one-byte equivalent to save space.
   std::vector<char> simplified;
   if (Simplify(code, var, &simplified)) {  // Changed.
     Debug("%s is simplified, re-generate definition\n", var.c_str());
     return GetDefinition(var, simplified);
   }

   // Simplification did not turn the code into 1-byte string. Just
   // use the original.
   return GetDefinitionImpl<uint16_t>(code, var, CodeType::kTwoByte);
 }

 int AddModule(const std::string& filename,
               Fragments* definitions,
               Fragments* initializers,
               Fragments* registrations) {
   Debug("AddModule %s start\n", filename.c_str());

   int error = 0;
   size_t file_size = GetFileSize(filename, &error);
   if (error != 0) {
     return error;
   }
   std::vector<char> code = ReadFileSync(filename.c_str(), file_size, &error);
   if (error != 0) {
     return error;
   }
   std::string file_id = GetFileId(filename);
   std::string var = GetVariableName(file_id);

   definitions->emplace_back(GetDefinition(var, code));
   std::string source_type = builtins::GetBuiltinSourceTypeName(
       builtins::GetBuiltinSourceType(file_id, filename));
   // Initializers of the BuiltinSourceMap:
   // {"fs",
   //   BuiltinSource{UnionBytes(&fs_resource), BuiltinSourceType::kFunction}},
   // {"internal/deps/v8/tools/tickprocessor-driver",
   //  BuiltinSource{UnionBytes(&fs_resource),
   //                BuiltinSourceType::kSourceTextModule}},
   Fragment& init_buf = initializers->emplace_back(Fragment(512, 0));
   int init_size = snprintf(init_buf.data(),
                            init_buf.size(),
                            "    {\"%s\","
                            " BuiltinSource{"
                            " \"%s\","
                            " UnionBytes(&%s_resource),"
                            " BuiltinSourceType::%s} },",
                            file_id.c_str(),
                            file_id.c_str(),
                            var.c_str(),
                            source_type.c_str());
   init_buf.resize(init_size);

   // Registrations:
   // registry->Register(&fs_resource);
   Fragment& reg_buf = registrations->emplace_back(Fragment(256, 0));
   int reg_size = snprintf(reg_buf.data(),
                           reg_buf.size(),
                           "  registry->Register(&%s_resource);",
                           var.c_str());
   reg_buf.resize(reg_size);
   return 0;
 }

 std::vector<char> ReplaceAll(const std::vector<char>& data,
                              const std::string& search,
                              const std::string& replacement) {
   auto cur = data.begin();
   auto last = data.begin();
   std::vector<char> result;
   result.reserve(data.size());
   while ((cur = std::search(last, data.end(), search.begin(), search.end())) !=
          data.end()) {
     result.insert(result.end(), last, cur);
     result.insert(result.end(),
                   replacement.c_str(),
                   replacement.c_str() + replacement.size());
     last = cur + search.size();
   }
   result.insert(result.end(), last, data.end());
   return result;
 }

 std::vector<char> StripComments(const std::vector<char>& input) {
   std::vector<char> result;
   result.reserve(input.size());

   auto last_hash = input.cbegin();
   auto line_begin = input.cbegin();
   auto end = input.cend();
   while ((last_hash = std::find(line_begin, end, '#')) != end) {
     result.insert(result.end(), line_begin, last_hash);
     line_begin = std::find(last_hash, end, '\n');
     if (line_begin != end) {
       line_begin += 1;
     }
   }
   result.insert(result.end(), line_begin, end);
   return result;
 }

 // This is technically unused for our config.gypi, but just porting it here to
 // mimic js2c.py.
 std::vector<char> JoinMultilineString(const std::vector<char>& input) {
   std::vector<char> result;
   result.reserve(input.size());

   auto closing_quote = input.cbegin();
   auto last_inserted = input.cbegin();
   auto end = input.cend();
   std::string search = "'\n";
   while ((closing_quote = std::search(
               last_inserted, end, search.begin(), search.end())) != end) {
     if (closing_quote != last_inserted) {
       result.insert(result.end(), last_inserted, closing_quote - 1);
       last_inserted = closing_quote - 1;
     }
     auto opening_quote = closing_quote + 2;
     while (opening_quote != end && isspace(*opening_quote)) {
       opening_quote++;
     }
     if (opening_quote == end) {
       break;
     }
     if (*opening_quote == '\'') {
       last_inserted = opening_quote + 1;
     } else {
       result.insert(result.end(), last_inserted, opening_quote);
       last_inserted = opening_quote;
     }
   }
   result.insert(result.end(), last_inserted, end);
   return result;
 }

 std::vector<char> JSONify(const std::vector<char>& code) {
   // 1. Remove string comments
   std::vector<char> stripped = StripComments(code);

   // 2. join multiline strings
   std::vector<char> joined = JoinMultilineString(stripped);

   // 3. normalize string literals from ' into "
   for (size_t i = 0; i < joined.size(); ++i) {
     if (joined[i] == '\'') {
       joined[i] = '"';
     }
   }

   // 4. turn pseudo-booleans strings into Booleans
   std::vector<char> result3 = ReplaceAll(joined, R"("true")", "true");
   std::vector<char> result4 = ReplaceAll(result3, R"("false")", "false");

   return result4;
 }

 int AddGypi(const std::string& var,
             const std::string& filename,
             Fragments* definitions) {
   Debug("AddGypi %s start\n", filename.c_str());

   int error = 0;
   size_t file_size = GetFileSize(filename, &error);
   if (error != 0) {
     return error;
   }
   std::vector<char> code = ReadFileSync(filename.c_str(), file_size, &error);
   if (error != 0) {
     return error;
   }
   assert(var == "config");

   std::vector<char> transformed = JSONify(code);
   definitions->emplace_back(GetDefinition(var, transformed));
   return 0;
 }

 int JS2C(const FileList& js_files,
          const FileList& mjs_files,
          const std::string& config,
          const std::string& dest) {
   Fragments definitions;
   definitions.reserve(js_files.size() + mjs_files.size() + 1);
   Fragments initializers;
   initializers.reserve(js_files.size() + mjs_files.size());
   Fragments registrations;
   registrations.reserve(js_files.size() + mjs_files.size() + 1);

   for (const auto& filename : js_files) {
     int r = AddModule(filename, &definitions, &initializers, &registrations);
     if (r != 0) {
       return r;
     }
   }
   for (const auto& filename : mjs_files) {
     int r = AddModule(filename, &definitions, &initializers, &registrations);
     if (r != 0) {
       return r;
     }
   }

   assert(FilenameIsConfigGypi(config));
   // "config.gypi" -> config_raw.
   int r = AddGypi("config", config, &definitions);
   if (r != 0) {
     return r;
   }
   Fragment out = Format(definitions, initializers, registrations);
   return WriteIfChanged(out, dest);
 }

 int PrintUsage(const char* argv0) {
   fprintf(stderr,
           "Usage: %s [--verbose] [--root /path/to/project/root] "
           "path/to/output.cc path/to/directory "
           "[extra-files ...]\n",
           argv0);
   return 1;
 }

 int Main(int argc, char* argv[]) {
   if (argc < 3) {
     return PrintUsage(argv[0]);
   }

   std::vector<std::string> args;
   args.reserve(argc);
   std::string root_dir;
   for (int i = 1; i < argc; ++i) {
     std::string arg(argv[i]);
     if (arg == "--verbose") {
       is_verbose = true;
     } else if (arg == "--root") {
       if (i == argc - 1) {
         fprintf(stderr, "--root must be followed by a path\n");
         return 1;
       }
       root_dir = argv[++i];
     } else {
       args.emplace_back(argv[i]);
     }
   }

   if (args.size() < 2) {
     return PrintUsage(argv[0]);
   }

   if (!root_dir.empty()) {
     int r = uv_chdir(root_dir.c_str());
     if (r != 0) {
       fprintf(stderr, "Cannot switch to the directory specified by --root\n");
       PrintUvError("chdir", root_dir.c_str(), r);
       return 1;
     }
   }
   std::string output = args[0];

   FileMap file_map;
   for (size_t i = 1; i < args.size(); ++i) {
     int error = 0;
     const std::string& file = args[i];
     if (IsDirectory(file, &error)) {
       if (!SearchFiles(file, &file_map, kJsSuffix) ||
           !SearchFiles(file, &file_map, kMjsSuffix)) {
         return 1;
       }
     } else if (error != 0) {
       return 1;
     } else {  // It's a file.
       std::string_view extension = HasAllowedExtensions(file);
       if (extension.size() != 0) {
         auto it = file_map.insert({std::string(extension), FileList()}).first;
         it->second.push_back(file);
       } else {
         fprintf(stderr, "Unsupported file: %s\n", file.c_str());
         return 1;
       }
     }
   }

   // Should have exactly 3 types: `.js`, `.mjs` and `.gypi`.
   assert(file_map.size() == 3);
   auto gypi_it = file_map.find(".gypi");
   // Currently config.gypi is the only `.gypi` file allowed
   if (gypi_it == file_map.end() || gypi_it->second.size() != 1 ||
       !FilenameIsConfigGypi(gypi_it->second[0])) {
     fprintf(
         stderr,
         "Arguments should contain one and only one .gypi file: config.gypi\n");
     return 1;
   }
   auto js_it = file_map.find(".js");
   auto mjs_it = file_map.find(".mjs");
   assert(js_it != file_map.end() && mjs_it != file_map.end());

   auto it = std::find(mjs_it->second.begin(),
                       mjs_it->second.end(),
                       "lib/eslint.config_partial.mjs");
   if (it != mjs_it->second.end()) {
     mjs_it->second.erase(it);
   }

   std::sort(js_it->second.begin(), js_it->second.end());
   std::sort(mjs_it->second.begin(), mjs_it->second.end());

   return JS2C(js_it->second, mjs_it->second, gypi_it->second[0], output);
 }
 }  // namespace js2c
 }  // namespace node

 NODE_MAIN(int argc, node::argv_type raw_argv[]) {
   char** argv;
   node::FixupMain(argc, raw_argv, &argv);
   return node::js2c::Main(argc, argv);
 }