blob: 0920747012f0728977bc1291a64ddb204538d6e6 [file] [log] [blame] [edit]
#include "compile_cache.h"
#include <string>
#include "debug_utils-inl.h"
#include "env-inl.h"
#include "node_file.h"
#include "node_internals.h"
#include "node_version.h"
#include "path.h"
#include "util.h"
#include "zlib.h"
#ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
#include <unistd.h> // getuid
#endif
#ifdef _WIN32
#include <windows.h>
#endif
namespace node {
using v8::Function;
using v8::Local;
using v8::Module;
using v8::ScriptCompiler;
using v8::String;
namespace {
std::string Uint32ToHex(uint32_t crc) {
std::string str;
str.reserve(8);
for (int i = 28; i >= 0; i -= 4) {
char digit = (crc >> i) & 0xF;
digit += digit < 10 ? '0' : 'a' - 10;
str.push_back(digit);
}
return str;
}
// TODO(joyeecheung): use other hashes?
uint32_t GetHash(const char* data, size_t size) {
uLong crc = crc32(0L, Z_NULL, 0);
return crc32(crc, reinterpret_cast<const Bytef*>(data), size);
}
std::string GetCacheVersionTag() {
// On platforms where uids are available, use different folders for
// different users to avoid cache miss due to permission incompatibility.
// On platforms where uids are not available, bare with the cache miss.
// This should be fine on Windows, as there local directories tend to be
// user-specific.
std::string tag = std::string(NODE_VERSION) + '-' + std::string(NODE_ARCH) +
'-' + Uint32ToHex(ScriptCompiler::CachedDataVersionTag());
#ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
tag += '-' + std::to_string(getuid());
#endif
return tag;
}
uint32_t GetCacheKey(std::string_view filename, CachedCodeType type) {
uLong crc = crc32(0L, Z_NULL, 0);
crc = crc32(crc, reinterpret_cast<const Bytef*>(&type), sizeof(type));
crc = crc32(
crc, reinterpret_cast<const Bytef*>(filename.data()), filename.length());
return crc;
}
} // namespace
template <typename... Args>
inline void CompileCacheHandler::Debug(const char* format,
Args&&... args) const {
if (is_debug_) [[unlikely]] {
FPrintF(stderr, format, std::forward<Args>(args)...);
}
}
ScriptCompiler::CachedData* CompileCacheEntry::CopyCache() const {
DCHECK_NOT_NULL(cache);
int cache_size = cache->length;
uint8_t* data = new uint8_t[cache_size];
memcpy(data, cache->data, cache_size);
return new ScriptCompiler::CachedData(
data, cache_size, ScriptCompiler::CachedData::BufferOwned);
}
// Used for identifying and verifying a file is a compile cache file.
// See comments in CompileCacheHandler::Persist().
constexpr uint32_t kCacheMagicNumber = 0x8adfdbb2;
const char* CompileCacheEntry::type_name() const {
switch (type) {
case CachedCodeType::kCommonJS:
return "CommonJS";
case CachedCodeType::kESM:
return "ESM";
case CachedCodeType::kStrippedTypeScript:
return "StrippedTypeScript";
case CachedCodeType::kTransformedTypeScript:
return "TransformedTypeScript";
case CachedCodeType::kTransformedTypeScriptWithSourceMaps:
return "TransformedTypeScriptWithSourceMaps";
default:
UNREACHABLE();
}
}
void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
Debug("[compile cache] reading cache from %s for %s %s...",
entry->cache_filename,
entry->type_name(),
entry->source_filename);
uv_fs_t req;
auto defer_req_cleanup = OnScopeLeave([&req]() { uv_fs_req_cleanup(&req); });
const char* path = entry->cache_filename.c_str();
uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr);
if (req.result < 0) {
// req will be cleaned up by scope leave.
Debug(" %s\n", uv_strerror(req.result));
return;
}
uv_fs_req_cleanup(&req);
auto defer_close = OnScopeLeave([file]() {
uv_fs_t close_req;
CHECK_EQ(0, uv_fs_close(nullptr, &close_req, file, nullptr));
uv_fs_req_cleanup(&close_req);
});
// Read the headers.
std::vector<uint32_t> headers(kHeaderCount);
uv_buf_t headers_buf = uv_buf_init(reinterpret_cast<char*>(headers.data()),
kHeaderCount * sizeof(uint32_t));
const int r = uv_fs_read(nullptr, &req, file, &headers_buf, 1, 0, nullptr);
if (r != static_cast<int>(headers_buf.len)) {
Debug("reading header failed, bytes read %d", r);
if (req.result < 0 && is_debug_) {
Debug(", %s", uv_strerror(req.result));
}
Debug("\n");
return;
}
Debug("[%d %d %d %d %d]...",
headers[kMagicNumberOffset],
headers[kCodeSizeOffset],
headers[kCacheSizeOffset],
headers[kCodeHashOffset],
headers[kCacheHashOffset]);
if (headers[kMagicNumberOffset] != kCacheMagicNumber) {
Debug("magic number mismatch: expected %d, actual %d\n",
kCacheMagicNumber,
headers[kMagicNumberOffset]);
return;
}
// Check the code size and hash which are already computed.
if (headers[kCodeSizeOffset] != entry->code_size) {
Debug("code size mismatch: expected %d, actual %d\n",
entry->code_size,
headers[kCodeSizeOffset]);
return;
}
if (headers[kCodeHashOffset] != entry->code_hash) {
Debug("code hash mismatch: expected %d, actual %d\n",
entry->code_hash,
headers[kCodeHashOffset]);
return;
}
// Read the cache, grow the buffer exponentially whenever it fills up.
size_t offset = headers_buf.len;
size_t capacity = 4096; // Initial buffer capacity
size_t total_read = 0;
uint8_t* buffer = new uint8_t[capacity];
while (true) {
// If there is not enough space to read more data, do a simple
// realloc here (we don't actually realloc because V8 requires
// the underlying buffer to be delete[]-able).
if (total_read == capacity) {
size_t new_capacity = capacity * 2;
auto* new_buffer = new uint8_t[new_capacity];
memcpy(new_buffer, buffer, capacity);
delete[] buffer;
buffer = new_buffer;
capacity = new_capacity;
}
uv_buf_t iov = uv_buf_init(reinterpret_cast<char*>(buffer + total_read),
capacity - total_read);
int bytes_read =
uv_fs_read(nullptr, &req, file, &iov, 1, offset + total_read, nullptr);
if (req.result < 0) { // Error.
// req will be cleaned up by scope leave.
delete[] buffer;
Debug(" %s\n", uv_strerror(req.result));
return;
}
uv_fs_req_cleanup(&req);
if (bytes_read <= 0) {
break;
}
total_read += bytes_read;
}
// Check the cache size and hash.
if (headers[kCacheSizeOffset] != total_read) {
Debug("cache size mismatch: expected %d, actual %d\n",
headers[kCacheSizeOffset],
total_read);
return;
}
uint32_t cache_hash = GetHash(reinterpret_cast<char*>(buffer), total_read);
if (headers[kCacheHashOffset] != cache_hash) {
Debug("cache hash mismatch: expected %d, actual %d\n",
headers[kCacheHashOffset],
cache_hash);
return;
}
entry->cache.reset(new ScriptCompiler::CachedData(
buffer, total_read, ScriptCompiler::CachedData::BufferOwned));
Debug(" success, size=%d\n", total_read);
}
static std::string GetRelativePath(std::string_view path,
std::string_view base) {
// On Windows, the native encoding is UTF-16, so we need to convert
// the paths to wide strings before using std::filesystem::path.
// On other platforms, std::filesystem::path can handle UTF-8 directly.
#ifdef _WIN32
std::filesystem::path module_path(ConvertUTF8ToWideString(std::string(path)));
std::filesystem::path base_path(ConvertUTF8ToWideString(std::string(base)));
#else
std::filesystem::path module_path(path);
std::filesystem::path base_path(base);
#endif
std::filesystem::path relative = module_path.lexically_relative(base_path);
auto u8str = relative.u8string();
return std::string(u8str.begin(), u8str.end());
}
CompileCacheEntry* CompileCacheHandler::GetOrInsert(Local<String> code,
Local<String> filename,
CachedCodeType type) {
DCHECK(!compile_cache_dir_.empty());
Environment* env = Environment::GetCurrent(isolate_->GetCurrentContext());
Utf8Value filename_utf8(isolate_, filename);
std::string file_path = filename_utf8.ToString();
// If the portable cache is enabled and it seems possible to compute the
// relative position from an absolute path, we use the relative position
// in the cache key.
if (portable_ == EnableOption::PORTABLE && IsAbsoluteFilePath(file_path)) {
// Normalize the path to ensure it is consistent.
std::string normalized_file_path = NormalizeFileURLOrPath(env, file_path);
if (normalized_file_path.empty()) {
return nullptr;
}
std::string relative_path =
GetRelativePath(normalized_file_path, normalized_compile_cache_dir_);
if (!relative_path.empty()) {
file_path = relative_path;
Debug("[compile cache] using relative path %s from %s\n",
file_path.c_str(),
compile_cache_dir_.c_str());
}
}
uint32_t key = GetCacheKey(file_path, type);
// TODO(joyeecheung): don't encode this again into UTF8. If we read the
// UTF8 content on disk as raw buffer (from the JS layer, while watching out
// for monkey patching), we can just hash it directly.
Utf8Value code_utf8(isolate_, code);
uint32_t code_hash = GetHash(code_utf8.out(), code_utf8.length());
auto loaded = compiler_cache_store_.find(key);
// TODO(joyeecheung): let V8's in-isolate compilation cache take precedence.
if (loaded != compiler_cache_store_.end() &&
loaded->second->code_hash == code_hash) {
return loaded->second.get();
}
// If the code hash mismatches, the code has changed, discard the stale entry
// and create a new one.
auto emplaced =
compiler_cache_store_.emplace(key, std::make_unique<CompileCacheEntry>());
auto* result = emplaced.first->second.get();
result->code_hash = code_hash;
result->code_size = code_utf8.length();
result->cache_key = key;
result->cache_filename =
compile_cache_dir_ + kPathSeparator + Uint32ToHex(key);
result->source_filename = filename_utf8.ToString();
result->cache = nullptr;
result->type = type;
// TODO(joyeecheung): if we fail enough times, stop trying for any future
// files.
ReadCacheFile(result);
return result;
}
ScriptCompiler::CachedData* SerializeCodeCache(Local<Function> func) {
return ScriptCompiler::CreateCodeCacheForFunction(func);
}
ScriptCompiler::CachedData* SerializeCodeCache(Local<Module> mod) {
return ScriptCompiler::CreateCodeCache(mod->GetUnboundModuleScript());
}
template <typename T>
void CompileCacheHandler::MaybeSaveImpl(CompileCacheEntry* entry,
Local<T> func_or_mod,
bool rejected) {
DCHECK_NOT_NULL(entry);
Debug("[compile cache] V8 code cache for %s %s was %s, ",
entry->type_name(),
entry->source_filename,
rejected ? "rejected"
: (entry->cache == nullptr) ? "not initialized"
: "accepted");
if (entry->cache != nullptr && !rejected) { // accepted
Debug("keeping the in-memory entry\n");
return;
}
Debug("%s the in-memory entry\n",
entry->cache == nullptr ? "initializing" : "refreshing");
ScriptCompiler::CachedData* data = SerializeCodeCache(func_or_mod);
DCHECK_EQ(data->buffer_policy, ScriptCompiler::CachedData::BufferOwned);
entry->refreshed = true;
entry->cache.reset(data);
}
void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry,
Local<Module> mod,
bool rejected) {
DCHECK(mod->IsSourceTextModule());
MaybeSaveImpl(entry, mod, rejected);
}
void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry,
Local<Function> func,
bool rejected) {
MaybeSaveImpl(entry, func, rejected);
}
void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry,
std::string_view transpiled) {
CHECK(entry->type == CachedCodeType::kStrippedTypeScript ||
entry->type == CachedCodeType::kTransformedTypeScript ||
entry->type == CachedCodeType::kTransformedTypeScriptWithSourceMaps);
Debug("[compile cache] saving transpilation cache for %s %s\n",
entry->type_name(),
entry->source_filename);
// TODO(joyeecheung): it's weird to copy it again here. Convert the v8::String
// directly into buffer held by v8::ScriptCompiler::CachedData here.
int cache_size = static_cast<int>(transpiled.size());
uint8_t* data = new uint8_t[cache_size];
memcpy(data, transpiled.data(), cache_size);
entry->cache.reset(new ScriptCompiler::CachedData(
data, cache_size, ScriptCompiler::CachedData::BufferOwned));
entry->refreshed = true;
}
/**
* Persist the compile cache accumulated in memory to disk.
*
* To avoid race conditions, the cache file includes hashes of the original
* source code and the cache content. It's first written to a temporary file
* before being renamed to the target name.
*
* Layout of a cache file:
* [uint32_t] magic number
* [uint32_t] code size
* [uint32_t] code hash
* [uint32_t] cache size
* [uint32_t] cache hash
* .... compile cache content ....
*/
void CompileCacheHandler::Persist() {
DCHECK(!compile_cache_dir_.empty());
// TODO(joyeecheung): do this using a separate event loop to utilize the
// libuv thread pool and do the file system operations concurrently.
// TODO(joyeecheung): Currently flushing is triggered by either process
// shutdown or user requests. In the future we should simply start the
// writes right after module loading on a separate thread, and this method
// only blocks until all the pending writes (if any) on the other thread are
// finished. In that case, the off-thread writes should finish long
// before any attempt of flushing is made so the method would then only
// incur a negligible overhead from thread synchronization.
for (auto& pair : compiler_cache_store_) {
auto* entry = pair.second.get();
const char* type_name = entry->type_name();
if (entry->cache == nullptr) {
Debug("[compile cache] skip persisting %s %s because the cache was not "
"initialized\n",
type_name,
entry->source_filename);
continue;
}
if (entry->refreshed == false) {
Debug(
"[compile cache] skip persisting %s %s because cache was the same\n",
type_name,
entry->source_filename);
continue;
}
if (entry->persisted == true) {
Debug("[compile cache] skip persisting %s %s because cache was already "
"persisted\n",
type_name,
entry->source_filename);
continue;
}
DCHECK_EQ(entry->cache->buffer_policy,
ScriptCompiler::CachedData::BufferOwned);
char* cache_ptr =
reinterpret_cast<char*>(const_cast<uint8_t*>(entry->cache->data));
uint32_t cache_size = static_cast<uint32_t>(entry->cache->length);
uint32_t cache_hash = GetHash(cache_ptr, cache_size);
// Generating headers.
std::vector<uint32_t> headers(kHeaderCount);
headers[kMagicNumberOffset] = kCacheMagicNumber;
headers[kCodeSizeOffset] = entry->code_size;
headers[kCacheSizeOffset] = cache_size;
headers[kCodeHashOffset] = entry->code_hash;
headers[kCacheHashOffset] = cache_hash;
// Generate the temporary filename.
// The temporary file should be placed in a location like:
//
// $NODE_COMPILE_CACHE_DIR/v23.0.0-pre-arm64-5fad6d45-501/e7f8ef7f.cache.tcqrsK
//
// 1. $NODE_COMPILE_CACHE_DIR either comes from the $NODE_COMPILE_CACHE
// environment
// variable or `module.enableCompileCache()`.
// 2. v23.0.0-pre-arm64-5fad6d45-501 is the sub cache directory and
// e7f8ef7f is the hash for the cache (see
// CompileCacheHandler::Enable()),
// 3. tcqrsK is generated by uv_fs_mkstemp() as a temporary identifier.
uv_fs_t mkstemp_req;
auto cleanup_mkstemp =
OnScopeLeave([&mkstemp_req]() { uv_fs_req_cleanup(&mkstemp_req); });
std::string cache_filename_tmp = entry->cache_filename + ".XXXXXX";
Debug("[compile cache] Creating temporary file for cache of %s (%s)...",
entry->source_filename,
type_name);
int err = uv_fs_mkstemp(
nullptr, &mkstemp_req, cache_filename_tmp.c_str(), nullptr);
if (err < 0) {
Debug("failed. %s\n", uv_strerror(err));
continue;
}
Debug(" -> %s\n", mkstemp_req.path);
Debug("[compile cache] writing cache for %s %s to temporary file %s [%d "
"%d %d "
"%d %d]...",
type_name,
entry->source_filename,
mkstemp_req.path,
headers[kMagicNumberOffset],
headers[kCodeSizeOffset],
headers[kCacheSizeOffset],
headers[kCodeHashOffset],
headers[kCacheHashOffset]);
// Write to the temporary file.
uv_buf_t headers_buf = uv_buf_init(reinterpret_cast<char*>(headers.data()),
headers.size() * sizeof(uint32_t));
uv_buf_t data_buf = uv_buf_init(cache_ptr, entry->cache->length);
uv_buf_t bufs[] = {headers_buf, data_buf};
uv_fs_t write_req;
auto cleanup_write =
OnScopeLeave([&write_req]() { uv_fs_req_cleanup(&write_req); });
err = uv_fs_write(
nullptr, &write_req, mkstemp_req.result, bufs, 2, 0, nullptr);
if (err < 0) {
Debug("failed: %s\n", uv_strerror(err));
continue;
}
uv_fs_t close_req;
auto cleanup_close =
OnScopeLeave([&close_req]() { uv_fs_req_cleanup(&close_req); });
err = uv_fs_close(nullptr, &close_req, mkstemp_req.result, nullptr);
if (err < 0) {
Debug("failed: %s\n", uv_strerror(err));
continue;
}
Debug("success\n");
// Rename the temporary file to the actual cache file.
uv_fs_t rename_req;
auto cleanup_rename =
OnScopeLeave([&rename_req]() { uv_fs_req_cleanup(&rename_req); });
std::string cache_filename_final = entry->cache_filename;
Debug("[compile cache] Renaming %s to %s...",
mkstemp_req.path,
cache_filename_final);
err = uv_fs_rename(nullptr,
&rename_req,
mkstemp_req.path,
cache_filename_final.c_str(),
nullptr);
if (err < 0) {
Debug("failed: %s\n", uv_strerror(err));
continue;
}
Debug("success\n");
entry->persisted = true;
}
// Clear the map at the end in one go instead of during the iteration to
// avoid rehashing costs.
Debug("[compile cache] Clear deserialized cache.\n");
compiler_cache_store_.clear();
}
CompileCacheHandler::CompileCacheHandler(Environment* env)
: isolate_(env->isolate()),
is_debug_(
env->enabled_debug_list()->enabled(DebugCategory::COMPILE_CACHE)) {}
// Directory structure:
// - Compile cache directory (from NODE_COMPILE_CACHE)
// - $NODE_VERSION-$ARCH-$CACHE_DATA_VERSION_TAG-$UID
// - $FILENAME_AND_MODULE_TYPE_HASH.cache: a hash of filename + module type
CompileCacheEnableResult CompileCacheHandler::Enable(Environment* env,
const std::string& dir,
EnableOption option) {
std::string cache_tag = GetCacheVersionTag();
std::string absolute_cache_dir_base = PathResolve(env, {dir});
std::string cache_dir_with_tag =
absolute_cache_dir_base + kPathSeparator + cache_tag;
CompileCacheEnableResult result;
Debug("[compile cache] resolved path %s + %s -> %s\n",
dir,
cache_tag,
cache_dir_with_tag);
if (!env->permission()->is_granted(
env,
permission::PermissionScope::kFileSystemWrite,
cache_dir_with_tag)) [[unlikely]] {
result.message = "Skipping compile cache because write permission for " +
cache_dir_with_tag + " is not granted";
result.status = CompileCacheEnableStatus::FAILED;
return result;
}
if (!env->permission()->is_granted(
env,
permission::PermissionScope::kFileSystemRead,
cache_dir_with_tag)) [[unlikely]] {
result.message = "Skipping compile cache because read permission for " +
cache_dir_with_tag + " is not granted";
result.status = CompileCacheEnableStatus::FAILED;
return result;
}
fs::FSReqWrapSync req_wrap;
int err = fs::MKDirpSync(
nullptr, &(req_wrap.req), cache_dir_with_tag, 0777, nullptr);
if (is_debug_) {
Debug("[compile cache] creating cache directory %s...%s\n",
cache_dir_with_tag,
err < 0 ? uv_strerror(err) : "success");
}
if (err != 0 && err != UV_EEXIST) {
result.message =
"Cannot create cache directory: " + std::string(uv_strerror(err));
result.status = CompileCacheEnableStatus::FAILED;
return result;
}
result.cache_directory = absolute_cache_dir_base;
compile_cache_dir_ = cache_dir_with_tag;
portable_ = option;
if (option == EnableOption::PORTABLE) {
normalized_compile_cache_dir_ =
NormalizeFileURLOrPath(env, compile_cache_dir_);
}
result.status = CompileCacheEnableStatus::ENABLED;
return result;
}
} // namespace node