Skip to content

Commit

Permalink
Use HashMap with faster hashing algorithm for packfile writer
Browse files Browse the repository at this point in the history
Summary: We use HashMap to store the set of objects written to packfile to ensure we do not add duplicate objects to the pack. These collections can grow quite large in which case the performance of the hashing algorithm plays a role in the overall runtime. Since the input to these collections are known to be safe, we do not need a cryptographic hash and can instead use a much faster non-cryptographic hashing algorithm used within the rust compiler

Differential Revision: D54638708

fbshipit-source-id: 5053cf94598be3670ca2339878eeacb2a60eddd0
  • Loading branch information
RajivTS authored and facebook-github-bot committed Mar 8, 2024
1 parent 555e33f commit 5405dd8
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
1 change: 1 addition & 0 deletions eden/mononoke/git/packfile/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ mononoke_types = { version = "0.1.0", path = "../../mononoke_types" }
packfile_thrift = { version = "0.1.0", path = "if" }
pin-project = "0.4.30"
quickcheck = "1.0"
rustc-hash = "1.1.0"
sha1 = "0.10.5"
thiserror = "1.0.49"
tokio = { version = "1.29.1", features = ["full", "test-util", "tracing"] }
Expand Down
1 change: 1 addition & 0 deletions eden/mononoke/git/packfile/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ rust_library(
"fbsource//third-party/rust:gix-pack",
"fbsource//third-party/rust:pin-project",
"fbsource//third-party/rust:quickcheck",
"fbsource//third-party/rust:rustc-hash",
"fbsource//third-party/rust:sha1",
"fbsource//third-party/rust:thiserror",
"fbsource//third-party/rust:tokio",
Expand Down
10 changes: 8 additions & 2 deletions eden/mononoke/git/packfile/src/pack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

use std::collections::HashMap;
use std::hash::BuildHasherDefault;
use std::io::Write;

use anyhow::Context;
Expand All @@ -16,6 +17,8 @@ use gix_hash::ObjectId;
use gix_pack::data::header;
use gix_pack::data::output::Entry;
use gix_pack::data::Version;
use rustc_hash::FxHashMap;
use rustc_hash::FxHasher;
use thiserror::Error;
use tokio::io::AsyncWrite;
use tokio::io::AsyncWriteExt;
Expand Down Expand Up @@ -66,7 +69,7 @@ where
/// The form of deltas that should be allowed in the packfile
delta_form: DeltaForm,
/// Mapping from Object Id to index in `object_offset_with_validity`
object_id_with_index: HashMap<ObjectId, usize>,
object_id_with_index: FxHashMap<ObjectId, usize>,
}

impl<T: AsyncWrite + Unpin> PackfileWriter<T> {
Expand All @@ -82,7 +85,10 @@ impl<T: AsyncWrite + Unpin> PackfileWriter<T> {
// Git uses V2 right now so we do the same
header_info: Some((Version::V2, count)),
object_offset_with_validity: Vec::with_capacity(count as usize),
object_id_with_index: HashMap::with_capacity(count as usize),
object_id_with_index: HashMap::with_capacity_and_hasher(
count as usize,
BuildHasherDefault::<FxHasher>::default(),
),
delta_form,
}
}
Expand Down

0 comments on commit 5405dd8

Please sign in to comment.