Skip to content

Commit

Permalink
Merge pull request #6 from zao/pr/find-hash-seed
Browse files Browse the repository at this point in the history
Derive seed for MurmurHash64A automatically
  • Loading branch information
zao authored Jul 27, 2023
2 parents 024e5cd + 8e090eb commit 6307b38
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 34 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ if (OOZ_BUILD_BUN)
add_library(bunutil STATIC
"fnv.cpp" "fnv.h"
"murmur.cpp" "murmur.h"
"path_rep.cpp" "path_rep.h"
"util.cpp" "util.h"
"utf.cpp" "utf.h"
)
Expand All @@ -88,7 +89,7 @@ if (OOZ_BUILD_BUN)

add_subdirectory(libpoe)

add_executable(bun_extract_file "bun_extract_file.cpp" "path_rep.cpp" "path_rep.h" "ggpk_vfs.cpp" "ggpk_vfs.h")
add_executable(bun_extract_file "bun_extract_file.cpp" "ggpk_vfs.cpp" "ggpk_vfs.h")
target_link_libraries(bun_extract_file PRIVATE libbun libpoe)
if (UNIX)
target_link_libraries(bun_extract_file PRIVATE "-lstdc++fs")
Expand Down
97 changes: 64 additions & 33 deletions bun.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "fnv.h"
#include "murmur.h"
#include "path_rep.h"
#include "util.h"

#ifdef _WIN32
Expand Down Expand Up @@ -78,6 +79,32 @@ struct BunIndex {
uint64_t hash_seed_;
};

inline uint64_t hash_path_3_21_2(std::string path, uint64_t seed) {
while (path.back() == '/') {
path.pop_back();
}
for (auto &ch : path) {
ch = (char)std::tolower((int)(unsigned char)ch);
}
return murmur_hash_64a(path.c_str(), (int)path.size(), seed);
}

inline uint64_t hash_directory_3_11_2(std::string path) {
while (path.back() == '/') {
path.pop_back();
}
path += "++";
return fnv1a_64(path.data(), path.size());
}

inline uint64_t hash_file_3_11_2(std::string path) {
for (auto &ch : path) {
ch = (char)std::tolower((int)(unsigned char)ch);
}
path += "++";
return fnv1a_64(path.data(), path.size());
}

bool BunIndex::read_file(char const *path, std::vector<uint8_t> &out) {
std::string full_path = bundle_root_ + '/' + path;
if (vfs_) {
Expand Down Expand Up @@ -269,23 +296,53 @@ BUN_DLL_PUBLIC BunIndex *BunIndexOpen(Bun *bun, Vfs *vfs, char const *root_dir)
idx->path_rep_infos_.push_back(si);
}

auto inner_mem = BunDecompressBundleAlloc(idx->bun_, r.p_, r.n_);
idx->inner_mem_ = inner_mem;
fprintf(stderr, "Decompressed inner size: %lld\n", BunMemSize(inner_mem));

idx->hash_algorithm_ = HashAlgorithm::Unknown;
idx->hash_seed_ = 0;
if (some_count) {
switch (idx->path_rep_infos_[0].hash) {
auto root_hash = idx->path_rep_infos_[0].hash;
switch (root_hash) {
case 0x07e47507b4a92e53:
idx->hash_algorithm_ = HashAlgorithm::FNV1A_3_11_2;
break;
case 0xf42a94e69cff42fe:
idx->hash_algorithm_ = HashAlgorithm::MurmurHash2A_3_21_2;
idx->hash_seed_ = 0x1337b33f;
default: {
// Recover seed from root hash via math wizardry
auto h = root_hash;
h ^= h >> 47;
h *= 0x5F7A0EA7E59B19BDULL;
h ^= h >> 47;
bool seed_validated = true;
for (int i = 1; i < idx->path_rep_infos_.size(); ++i) {
auto &ref = idx->path_rep_infos_[i];
auto results = generate_paths(inner_mem + ref.offset, ref.size);
if (!results.empty()) {
auto &r = results[0];
auto slash_pos = r.find_last_of('/');
if (slash_pos != r.npos) {
auto dir = r.substr(0, slash_pos);
auto computed_hash = hash_path_3_21_2(dir, h);
seed_validated = (computed_hash == ref.hash);
break;
}
}
}
if (seed_validated) {
idx->hash_algorithm_ = HashAlgorithm::MurmurHash2A_3_21_2;
idx->hash_seed_ = h;
fprintf(stderr, "Hash seed: 0x%016llx\n", idx->hash_seed_);
}
break;
}
}
}

auto inner_mem = BunDecompressBundleAlloc(idx->bun_, r.p_, r.n_);
idx->inner_mem_ = inner_mem;
fprintf(stderr, "Decompressed inner size: %lld\n", BunMemSize(inner_mem));
if (idx->hash_algorithm_ == HashAlgorithm::Unknown) {
fprintf(stderr, "Could not detect path hash algorithm/seed\n");
return nullptr;
}

return idx.release();
}
Expand All @@ -297,32 +354,6 @@ BUN_DLL_PUBLIC void BunIndexClose(BunIndex *idx) {
}
}

uint64_t hash_path_3_21_2(std::string path, uint64_t seed) {
while (path.back() == '/') {
path.pop_back();
}
for (auto &ch : path) {
ch = (char)std::tolower((int)(unsigned char)ch);
}
return murmur_hash_64a(path.c_str(), (int)path.size(), seed);
}

uint64_t hash_directory_3_11_2(std::string path) {
while (path.back() == '/') {
path.pop_back();
}
path += "++";
return fnv1a_64(path.data(), path.size());
}

uint64_t hash_file_3_11_2(std::string path) {
for (auto &ch : path) {
ch = (char)std::tolower((int)(unsigned char)ch);
}
path += "++";
return fnv1a_64(path.data(), path.size());
}

BUN_DLL_PUBLIC int32_t BunIndexLookupFileByPath(BunIndex *idx, char const *path) {
if (!idx) {
return -1;
Expand Down

0 comments on commit 6307b38

Please sign in to comment.