From f9b19d7c44837a394d9b39425ba670762dab514f Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Wed, 4 Sep 2024 20:09:56 +0200 Subject: [PATCH] module: write compile cache to temporary file and then rename it This works better in terms of avoiding race conditions. PR-URL: https://github.com/nodejs/node/pull/54971 Fixes: https://github.com/nodejs/node/issues/54770 Fixes: https://github.com/nodejs/node/issues/54465 Reviewed-By: Yagiz Nizipli Reviewed-By: Matteo Collina --- src/compile_cache.cc | 115 ++++++++++++++++++++++++++++++++++++------- src/compile_cache.h | 2 + src/env.cc | 9 +++- src/env.h | 1 + 4 files changed, 107 insertions(+), 20 deletions(-) diff --git a/src/compile_cache.cc b/src/compile_cache.cc index 0843fb5dca7b46..e5262a19d1549c 100644 --- a/src/compile_cache.cc +++ b/src/compile_cache.cc @@ -219,6 +219,8 @@ CompileCacheEntry* CompileCacheHandler::GetOrInsert( return loaded->second.get(); } + // If the code hash mismatches, the code has changed, discard the stale entry + // and create a new one. auto emplaced = compiler_cache_store_.emplace(key, std::make_unique()); auto* result = emplaced.first->second.get(); @@ -287,23 +289,26 @@ void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry, MaybeSaveImpl(entry, func, rejected); } -// Layout of a cache file: -// [uint32_t] magic number -// [uint32_t] code size -// [uint32_t] code hash -// [uint32_t] cache size -// [uint32_t] cache hash -// .... compile cache content .... +/** + * Persist the compile cache accumulated in memory to disk. + * + * To avoid race conditions, the cache file includes hashes of the original + * source code and the cache content. It's first written to a temporary file + * before being renamed to the target name. + * + * Layout of a cache file: + * [uint32_t] magic number + * [uint32_t] code size + * [uint32_t] code hash + * [uint32_t] cache size + * [uint32_t] cache hash + * .... compile cache content .... + */ void CompileCacheHandler::Persist() { DCHECK(!compile_cache_dir_.empty()); - // NOTE(joyeecheung): in most circumstances the code caching reading - // writing logic is lenient enough that it's fine even if someone - // overwrites the cache (that leads to either size or hash mismatch - // in subsequent loads and the overwritten cache will be ignored). - // Also in most use cases users should not change the files on disk - // too rapidly. Therefore locking is not currently implemented to - // avoid the cost. + // TODO(joyeecheung): do this using a separate event loop to utilize the + // libuv thread pool and do the file system operations concurrently. for (auto& pair : compiler_cache_store_) { auto* entry = pair.second.get(); if (entry->cache == nullptr) { @@ -316,6 +321,11 @@ void CompileCacheHandler::Persist() { entry->source_filename); continue; } + if (entry->persisted == true) { + Debug("[compile cache] skip %s because cache was already persisted\n", + entry->source_filename); + continue; + } DCHECK_EQ(entry->cache->buffer_policy, v8::ScriptCompiler::CachedData::BufferOwned); @@ -332,27 +342,94 @@ void CompileCacheHandler::Persist() { headers[kCodeHashOffset] = entry->code_hash; headers[kCacheHashOffset] = cache_hash; - Debug("[compile cache] writing cache for %s in %s [%d %d %d %d %d]...", + // Generate the temporary filename. + // The temporary file should be placed in a location like: + // + // $NODE_COMPILE_CACHE_DIR/v23.0.0-pre-arm64-5fad6d45-501/e7f8ef7f.cache.tcqrsK + // + // 1. $NODE_COMPILE_CACHE_DIR either comes from the $NODE_COMPILE_CACHE + // environment + // variable or `module.enableCompileCache()`. + // 2. v23.0.0-pre-arm64-5fad6d45-501 is the sub cache directory and + // e7f8ef7f is the hash for the cache (see + // CompileCacheHandler::Enable()), + // 3. tcqrsK is generated by uv_fs_mkstemp() as a temporary indentifier. + uv_fs_t mkstemp_req; + auto cleanup_mkstemp = + OnScopeLeave([&mkstemp_req]() { uv_fs_req_cleanup(&mkstemp_req); }); + std::string cache_filename_tmp = entry->cache_filename + ".XXXXXX"; + Debug("[compile cache] Creating temporary file for cache of %s...", + entry->source_filename); + int err = uv_fs_mkstemp( + nullptr, &mkstemp_req, cache_filename_tmp.c_str(), nullptr); + if (err < 0) { + Debug("failed. %s\n", uv_strerror(err)); + continue; + } + Debug(" -> %s\n", mkstemp_req.path); + Debug("[compile cache] writing cache for %s to temporary file %s [%d %d %d " + "%d %d]...", entry->source_filename, - entry->cache_filename, + mkstemp_req.path, headers[kMagicNumberOffset], headers[kCodeSizeOffset], headers[kCacheSizeOffset], headers[kCodeHashOffset], headers[kCacheHashOffset]); + // Write to the temporary file. uv_buf_t headers_buf = uv_buf_init(reinterpret_cast(headers.data()), headers.size() * sizeof(uint32_t)); uv_buf_t data_buf = uv_buf_init(cache_ptr, entry->cache->length); uv_buf_t bufs[] = {headers_buf, data_buf}; - int err = WriteFileSync(entry->cache_filename.c_str(), bufs, 2); + uv_fs_t write_req; + auto cleanup_write = + OnScopeLeave([&write_req]() { uv_fs_req_cleanup(&write_req); }); + err = uv_fs_write( + nullptr, &write_req, mkstemp_req.result, bufs, 2, 0, nullptr); + if (err < 0) { + Debug("failed: %s\n", uv_strerror(err)); + continue; + } + + uv_fs_t close_req; + auto cleanup_close = + OnScopeLeave([&close_req]() { uv_fs_req_cleanup(&close_req); }); + err = uv_fs_close(nullptr, &close_req, mkstemp_req.result, nullptr); + + if (err < 0) { + Debug("failed: %s\n", uv_strerror(err)); + continue; + } + + Debug("success\n"); + + // Rename the temporary file to the actual cache file. + uv_fs_t rename_req; + auto cleanup_rename = + OnScopeLeave([&rename_req]() { uv_fs_req_cleanup(&rename_req); }); + std::string cache_filename_final = entry->cache_filename; + Debug("[compile cache] Renaming %s to %s...", + mkstemp_req.path, + cache_filename_final); + err = uv_fs_rename(nullptr, + &rename_req, + mkstemp_req.path, + cache_filename_final.c_str(), + nullptr); if (err < 0) { Debug("failed: %s\n", uv_strerror(err)); - } else { - Debug("success\n"); + continue; } + Debug("success\n"); + entry->persisted = true; } + + // Clear the map at the end in one go instead of during the iteration to + // avoid rehashing costs. + Debug("[compile cache] Clear deserialized cache.\n"); + compiler_cache_store_.clear(); } CompileCacheHandler::CompileCacheHandler(Environment* env) diff --git a/src/compile_cache.h b/src/compile_cache.h index 975a3f2080f7a7..fe8c0b93cb3ef3 100644 --- a/src/compile_cache.h +++ b/src/compile_cache.h @@ -30,6 +30,8 @@ struct CompileCacheEntry { std::string source_filename; CachedCodeType type; bool refreshed = false; + bool persisted = false; + // Copy the cache into a new store for V8 to consume. Caller takes // ownership. v8::ScriptCompiler::CachedData* CopyCache() const; diff --git a/src/env.cc b/src/env.cc index 665b064091d4cc..7f83a942160973 100644 --- a/src/env.cc +++ b/src/env.cc @@ -1154,7 +1154,7 @@ CompileCacheEnableResult Environment::EnableCompileCache( compile_cache_handler_ = std::move(handler); AtExit( [](void* env) { - static_cast(env)->compile_cache_handler()->Persist(); + static_cast(env)->FlushCompileCache(); }, this); } @@ -1171,6 +1171,13 @@ CompileCacheEnableResult Environment::EnableCompileCache( return result; } +void Environment::FlushCompileCache() { + if (!compile_cache_handler_ || compile_cache_handler_->cache_dir().empty()) { + return; + } + compile_cache_handler_->Persist(); +} + void Environment::ExitEnv(StopFlags::Flags flags) { // Should not access non-thread-safe methods here. set_stopping(true); diff --git a/src/env.h b/src/env.h index fc8dbd61525585..b535e4a7894913 100644 --- a/src/env.h +++ b/src/env.h @@ -1045,6 +1045,7 @@ class Environment final : public MemoryRetainer { // Enable built-in compile cache if it has not yet been enabled. // The cache will be persisted to disk on exit. CompileCacheEnableResult EnableCompileCache(const std::string& cache_dir); + void FlushCompileCache(); void RunAndClearNativeImmediates(bool only_refed = false); void RunAndClearInterrupts();