From 7f7a744ebb9875e77f862a12e2fb9ca48ac27e7e Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 16 Aug 2023 11:44:44 +0900 Subject: [PATCH] Use BLAKE3 instead of SHA256 for cryptographic hash computation Closes https://github.com/rui314/mold/issues/1088 --- CMakeLists.txt | 20 +++++++---- common/sha.h | 81 -------------------------------------------- elf/icf.cc | 30 +++++++++------- elf/mold.h | 2 -- elf/output-chunks.cc | 29 +++++++++------- 5 files changed, 47 insertions(+), 115 deletions(-) delete mode 100644 common/sha.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e4e84222ab..d569c9ca1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,8 +121,19 @@ else() target_link_libraries(mold PRIVATE zlibstatic) endif() -# Find zstd compression library. Just like zlib, if libzstd.so is not -# found, we compile a bundled one and statically-link it to mold. +# Find BLAKE3 cryptographic hash library. Just like zlib, if libblkae3.so +# is not found, we compile a bundled one and statically-link it to mold. +find_package(BLAKE3 QUIET) +if(BLAKE3_FOUND) + target_link_libraries(mold PRIVATE BLAKE3::blake3) +else() + add_subdirectory(third-party/blake3/c EXCLUDE_FROM_ALL) + target_link_libraries(mold PRIVATE blake3) + target_include_directories(mold PUBLIC third-party/blake3/c) +endif() + +# Find zstd compression library. If libzstd.so is not found, we compile a +# bundled one and statically-link it to mold. include(CheckIncludeFile) check_include_file(zstd.h HAVE_ZSTD_H) @@ -237,11 +248,6 @@ if(NOT APPLE AND NOT WIN32) target_sources(mold-wrapper PRIVATE elf/mold-wrapper.c) endif() -if(NOT APPLE AND NOT WIN32 AND NOT MOLD_MOSTLY_STATIC) - find_package(OpenSSL REQUIRED COMPONENTS Crypto) - target_link_libraries(mold PRIVATE OpenSSL::Crypto) -endif() - # If atomics doesn't work by default, add -latomic. # We need the flag on riscv, armv6 and m68k. include(CheckCXXSourceCompiles) diff --git a/common/sha.h b/common/sha.h deleted file mode 100644 index c5c72c5bc4..0000000000 --- a/common/sha.h +++ /dev/null @@ -1,81 +0,0 @@ -#pragma once - -#include - -typedef uint8_t u8; -static constexpr int64_t SHA256_SIZE = 32; - -#ifdef _WIN32 -// On Windows, we use Microsoft CNG. - -#include -#include -#include - -inline static BCRYPT_ALG_HANDLE get_sha256_handle() { - static std::once_flag once; - static BCRYPT_ALG_HANDLE alg; - - std::call_once(once, [&] { - BCryptOpenAlgorithmProvider(&alg, BCRYPT_SHA256_ALGORITHM, nullptr, 0); - }); - return alg; -} - -inline void sha256_hash(u8 *in, size_t len, u8 *out) { - BCryptHash(get_sha256_handle(), nullptr, 0, in, len, out, SHA256_SIZE); -} - -class SHA256Hash { -public: - SHA256Hash() { - BCryptCreateHash(get_sha256_handle(), &handle, nullptr, 0, nullptr, 0, 0); - } - - void update(u8 *data, size_t len) { - BCryptHashData(handle, data, len, 0); - } - - void finish(u8 *out) { - BCryptFinishHash(handle, out, SHA256_SIZE, 0); - } - -private: - BCRYPT_HASH_HANDLE handle; -}; - -#else -// On Unix, we use OpenSSL or the Apple's OpenSSL-compatible API. - -#ifdef __APPLE__ -# define COMMON_DIGEST_FOR_OPENSSL -# include -# define SHA256(data, len, md) CC_SHA256(data, len, md) -#else -# define OPENSSL_SUPPRESS_DEPRECATED 1 -# include -#endif - -inline void sha256_hash(u8 *in, size_t len, u8 *out) { - SHA256(in, len, out); -} - -class SHA256Hash { -public: - SHA256Hash() { - SHA256_Init(&ctx); - } - - void update(u8 *data, size_t len) { - SHA256_Update(&ctx, data, len); - } - - void finish(u8 *out) { - SHA256_Final(out, &ctx); - } - -private: - SHA256_CTX ctx; -}; - -#endif diff --git a/elf/icf.cc b/elf/icf.cc index 75e1f08831..72773f42cb 100644 --- a/elf/icf.cc +++ b/elf/icf.cc @@ -65,7 +65,7 @@ // conditions. #include "mold.h" -#include "../common/sha.h" +#include "blake3.h" #include #include @@ -132,9 +132,11 @@ static bool is_eligible(Context &ctx, InputSection &isec) { !is_init && !is_fini && !is_enumerable && !is_addr_taken; } -static Digest digest_final(SHA256Hash &sha) { - u8 buf[SHA256_SIZE]; - sha.finish(buf); +static Digest digest_final(blake3_hasher *hasher) { + assert(HASH_SIZE <= BLAKE3_OUT_LEN); + + u8 buf[BLAKE3_OUT_LEN]; + blake3_hasher_finalize(hasher, buf, BLAKE3_OUT_LEN); Digest digest; memcpy(digest.data(), buf, HASH_SIZE); @@ -234,15 +236,16 @@ static void merge_leaf_nodes(Context &ctx) { template static Digest compute_digest(Context &ctx, InputSection &isec) { - SHA256Hash sha; + blake3_hasher hasher; + blake3_hasher_init(&hasher); auto hash = [&](auto val) { - sha.update((u8 *)&val, sizeof(val)); + blake3_hasher_update(&hasher, (u8 *)&val, sizeof(val)); }; auto hash_string = [&](std::string_view str) { hash(str.size()); - sha.update((u8 *)str.data(), str.size()); + blake3_hasher_update(&hasher, (u8 *)str.data(), str.size()); }; auto hash_symbol = [&](Symbol &sym) { @@ -298,7 +301,7 @@ static Digest compute_digest(Context &ctx, InputSection &isec) { hash_symbol(*isec.file.symbols[rel.r_sym]); } - return digest_final(sha); + return digest_final(&hasher); } template @@ -411,16 +414,17 @@ static i64 propagate(std::span> digests, if (converged.get(i)) return; - SHA256Hash sha; - sha.update(digests[2][i].data(), HASH_SIZE); + blake3_hasher hasher; + blake3_hasher_init(&hasher); + blake3_hasher_update(&hasher, digests[2][i].data(), HASH_SIZE); i64 begin = edge_indices[i]; i64 end = (i + 1 == num_digests) ? edges.size() : edge_indices[i + 1]; for (i64 j : edges.subspan(begin, end - begin)) - sha.update(digests[slot][j].data(), HASH_SIZE); + blake3_hasher_update(&hasher, digests[slot][j].data(), HASH_SIZE); - digests[!slot][i] = digest_final(sha); + digests[!slot][i] = digest_final(&hasher); if (digests[slot][i] == digests[!slot][i]) { // This node has converged. Skip further iterations as it will @@ -563,7 +567,7 @@ void icf_sections(Context &ctx) { } } - // Group sections by SHA digest. + // Group sections by BLAKE3 digest. { Timer t(ctx, "group"); diff --git a/elf/mold.h b/elf/mold.h index 3b042bd339..f494a2a884 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -36,8 +36,6 @@ namespace mold::elf { -static constexpr i32 SHA256_SIZE = 32; - template class InputFile; template class InputSection; template class MergedSection; diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index 160f237bdc..e37d4bf991 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -1,5 +1,5 @@ #include "mold.h" -#include "../common/sha.h" +#include "blake3.h" #include #include @@ -2519,19 +2519,28 @@ void BuildIdSection::copy_buf(Context &ctx) { memcpy(base + 3, "GNU", 4); // Name string } +// BLAKE3 is a cryptographic hash function just like SHA256. +// We use it instead of SHA256 because it's faster. +static void blake3_hash(u8 *buf, i64 size, u8 *out) { + blake3_hasher hasher; + blake3_hasher_init(&hasher); + blake3_hasher_update(&hasher, buf, size); + blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN); +} + template -static void compute_sha256(Context &ctx, i64 offset) { +static void compute_blake3(Context &ctx, i64 offset) { u8 *buf = ctx.buf; i64 filesize = ctx.output_file->filesize; i64 shard_size = 4096 * 1024; i64 num_shards = align_to(filesize, shard_size) / shard_size; - std::vector shards(num_shards * SHA256_SIZE); + std::vector shards(num_shards * BLAKE3_OUT_LEN); tbb::parallel_for((i64)0, num_shards, [&](i64 i) { u8 *begin = buf + shard_size * i; u8 *end = (i == num_shards - 1) ? buf + filesize : begin + shard_size; - sha256_hash(begin, end - begin, shards.data() + i * SHA256_SIZE); + blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN); #ifndef _WIN32 // We call munmap early for each chunk so that the last munmap @@ -2543,10 +2552,10 @@ static void compute_sha256(Context &ctx, i64 offset) { #endif }); - assert(ctx.arg.build_id.size() <= SHA256_SIZE); + assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN); - u8 digest[SHA256_SIZE]; - sha256_hash(shards.data(), shards.size(), digest); + u8 digest[BLAKE3_OUT_LEN]; + blake3_hash(shards.data(), shards.size(), digest); memcpy(buf + offset, digest, ctx.arg.build_id.size()); #ifndef _WIN32 @@ -2567,11 +2576,7 @@ void BuildIdSection::write_buildid(Context &ctx) { ctx.arg.build_id.value); return; case BuildId::HASH: - // Modern x86 processors have purpose-built instructions to accelerate - // SHA256 computation, and SHA256 outperforms MD5 on such computers. - // So, we always compute SHA256 and truncate it if smaller digest was - // requested. - compute_sha256(ctx, this->shdr.sh_offset + HEADER_SIZE); + compute_blake3(ctx, this->shdr.sh_offset + HEADER_SIZE); return; case BuildId::UUID: { std::array uuid = get_uuid_v4();