Skip to content

Commit

Permalink
unbatch batches with size < N
Browse files Browse the repository at this point in the history
Summary:
Half of files and trees (in terms of bytes) requests are batched https://fburl.com/scuba/mononoke_test_perf/fha12kbz and those are not consistently routed.

63% of batched requests are batches of size < 5: https://fburl.com/scuba/mononoke_test_perf/b5qrq6zb

most if it comes form edenfs: https://fburl.com/scuba/mononoke_test_perf/91hajq2o

Because of consistent hashing, we might be better off just sending 3 paralel consistently-routed requests instead of a batch of size 3 routed to random hosts, evicting things from cache all over the place.

This should help with S402137: EdenFS performance regression causing performance degradation for multiple tools

This will make more trees and files to be consistently routed, which should improve caching server-side. That should relieve the load on servers and speed up requests. Given the fact we're unbatching small batches, there shouldn't be a big impact on perf client-side as those requests can be made in parallel. I did run some manual tests (not very extensive though), and the results with (multiple requests) and without this change (a batch), were similar. W

Reviewed By: liubov-dmitrieva

Differential Revision: D54680781

fbshipit-source-id: a35c31e8c12ccca7936190b9c228ce580f7b74ff
  • Loading branch information
mzr authored and facebook-github-bot committed Mar 18, 2024
1 parent ca1d8a4 commit c5d0b0e
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 7 deletions.
6 changes: 6 additions & 0 deletions eden/scm/lib/edenapi/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ pub struct HttpClientBuilder {
max_location_to_hash: Option<usize>,
max_commit_mutations: Option<usize>,
max_commit_translate_id: Option<usize>,
min_batch_size: Option<usize>,
timeout: Option<Duration>,
debug: bool,
http_version: Option<HttpVersion>,
Expand Down Expand Up @@ -171,6 +172,7 @@ impl HttpClientBuilder {
get_config(config, "edenapi", "try-route-consistently")?.unwrap_or_default();
let max_history = get_config(config, "edenapi", "maxhistory")?;
let max_location_to_hash = get_config(config, "edenapi", "maxlocationtohash")?;
let min_batch_size = get_config(config, "edenapi", "min-batch-size")?;
let max_commit_mutations = get_config(config, "edenapi", "maxcommitmutations")?;
let max_commit_translate_id = get_config(config, "edenapi", "maxcommittranslateid")?;
let timeout = get_config(config, "edenapi", "timeout")?.map(Duration::from_secs);
Expand Down Expand Up @@ -218,6 +220,7 @@ impl HttpClientBuilder {
max_location_to_hash,
max_commit_mutations,
max_commit_translate_id,
min_batch_size,
timeout,
debug,
http_version,
Expand Down Expand Up @@ -368,6 +371,7 @@ pub(crate) struct Config {
pub(crate) max_location_to_hash: Option<usize>,
pub(crate) max_commit_mutations: Option<usize>,
pub(crate) max_commit_translate_id: Option<usize>,
pub(crate) min_batch_size: Option<usize>,
pub(crate) timeout: Option<Duration>,
#[allow(dead_code)]
pub(crate) debug: bool,
Expand All @@ -394,6 +398,7 @@ impl TryFrom<HttpClientBuilder> for Config {
max_location_to_hash,
max_commit_mutations,
max_commit_translate_id,
min_batch_size,
timeout,
debug,
http_version,
Expand Down Expand Up @@ -431,6 +436,7 @@ impl TryFrom<HttpClientBuilder> for Config {
max_location_to_hash,
max_commit_mutations,
max_commit_translate_id,
min_batch_size,
timeout,
debug,
http_version,
Expand Down
75 changes: 68 additions & 7 deletions eden/scm/lib/edenapi/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ impl Client {
url: &Url,
keys: K,
batch_size: Option<usize>,
min_batch_size: Option<usize>,
mut make_req: F,
mut mutate_url: G,
) -> Result<Vec<Request>, EdenApiError>
Expand All @@ -272,7 +273,7 @@ impl Client {
G: FnMut(&Url, &Vec<T>) -> Url,
R: ToWire,
{
split_into_batches(keys, batch_size)
split_into_batches(keys, batch_size, min_batch_size)
.into_iter()
.map(|keys| {
let url = mutate_url(url, &keys);
Expand Down Expand Up @@ -457,11 +458,13 @@ impl Client {
let url = self.build_url(paths::TREES)?;

let try_route_consistently = self.config().try_route_consistently;
let min_batch_size: Option<usize> = self.config().min_batch_size;

let requests = self.prepare_requests(
&url,
keys,
self.config().max_trees,
min_batch_size,
|keys| {
let req = TreeRequest {
keys,
Expand Down Expand Up @@ -499,11 +502,13 @@ impl Client {

let url = self.build_url(paths::FILES2)?;
let try_route_consistently = self.config().try_route_consistently;
let min_batch_size: Option<usize> = self.config().min_batch_size;

let requests = self.prepare_requests(
&url,
reqs,
self.config().max_files,
min_batch_size,
|reqs| {
let req = FileRequest { reqs, keys: vec![] };
self.log_request(&req, "files");
Expand Down Expand Up @@ -739,11 +744,13 @@ impl Client {
let url = self.build_url(paths::HISTORY)?;

let try_route_consistently = self.config().try_route_consistently;
let min_batch_size: Option<usize> = self.config().min_batch_size;

let requests = self.prepare_requests(
&url,
keys,
self.config().max_history,
min_batch_size,
|keys| {
let req = HistoryRequest { keys, length };
self.log_request(&req, "history");
Expand Down Expand Up @@ -781,6 +788,7 @@ impl Client {
&url,
files,
Some(MAX_CONCURRENT_BLAMES_PER_REQUEST),
None,
|files| {
let req = BlameRequest { files };
self.log_request(&req, "blame");
Expand Down Expand Up @@ -809,6 +817,7 @@ impl Client {
&url,
commits,
self.config().max_commit_translate_id,
None,
|commits| {
let req = CommitTranslateIdRequest {
commits,
Expand Down Expand Up @@ -934,6 +943,7 @@ impl Client {
&url,
items,
Some(MAX_CONCURRENT_UPLOAD_FILENODES_PER_REQUEST),
None,
|ids| Batch::<_> {
batch: ids
.into_iter()
Expand All @@ -960,6 +970,7 @@ impl Client {
&url,
items,
Some(MAX_CONCURRENT_UPLOAD_TREES_PER_REQUEST),
None,
|ids| Batch::<_> {
batch: ids
.into_iter()
Expand Down Expand Up @@ -1082,6 +1093,7 @@ impl EdenApi for Client {
&url,
prefixes,
Some(MAX_CONCURRENT_HASH_LOOKUPS_PER_REQUEST),
None,
|prefixes| Batch::<_> { batch: prefixes },
|url, _keys| url.clone(),
)?;
Expand Down Expand Up @@ -1197,6 +1209,7 @@ impl EdenApi for Client {
&url,
requests,
self.config().max_location_to_hash,
None,
|requests| {
let batch = CommitLocationToHashRequestBatch { requests };
self.log_request(&batch, "commit_location_to_hash");
Expand Down Expand Up @@ -1229,6 +1242,7 @@ impl EdenApi for Client {
&url,
hgids,
self.config().max_location_to_hash,
None,
|hgids| {
let batch = CommitHashToLocationRequestBatch {
master_heads: master_heads.clone(),
Expand Down Expand Up @@ -1356,6 +1370,7 @@ impl EdenApi for Client {
&url,
items,
Some(MAX_CONCURRENT_LOOKUPS_PER_REQUEST),
None,
|ids| Batch::<LookupRequest> {
batch: ids
.into_iter()
Expand Down Expand Up @@ -1530,6 +1545,7 @@ impl EdenApi for Client {
&url,
commits,
self.config().max_commit_mutations,
None,
|commits| {
let req = CommitMutationsRequest { commits };
self.log_request(&req, "commit_mutations");
Expand Down Expand Up @@ -1571,14 +1587,31 @@ impl EdenApi for Client {
fn split_into_batches<T>(
keys: impl IntoIterator<Item = T>,
batch_size: Option<usize>,
min_batch_size: Option<usize>,
) -> Vec<Vec<T>> {
match batch_size {
Some(n) => keys
.into_iter()
.chunks(n)
.into_iter()
.map(Vec::from_iter)
.collect(),
Some(n) => {
let mut chunks_vec = Vec::new();
for chunk in keys.into_iter().chunks(n).into_iter() {
let v = Vec::from_iter(chunk);
// This bit is used to not construct small batches
// because they are not routed consistently and
// because of that are subuptimal.
if let Some(min_batch_size) = min_batch_size {
if v.len() >= min_batch_size {
chunks_vec.push(v);
} else {
for key in v.into_iter() {
chunks_vec.push(vec![key]);
}
}
} else {
chunks_vec.push(v);
}
}

chunks_vec
}
None => vec![keys.into_iter().collect()],
}
}
Expand Down Expand Up @@ -1641,6 +1674,34 @@ mod tests {
use anyhow::Result;

use crate::builder::HttpClientBuilder;
use crate::client::split_into_batches;

#[test]
fn test_split_into_batches() -> Result<()> {
let keys = vec![1, 2, 3];
let result = split_into_batches(keys, Some(2), None);
assert_eq!(vec![vec![1, 2], vec![3]], result);

let keys = vec![1, 2, 3, 4];
let result = split_into_batches(keys, Some(2), None);
assert_eq!(vec![vec![1, 2], vec![3, 4]], result);

let keys = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let result = split_into_batches(keys, Some(4), Some(3));
assert_eq!(
vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8], vec![9], vec![10]],
result
);

let keys = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let result = split_into_batches(keys, Some(4), None);
assert_eq!(
vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8], vec![9, 10]],
result
);

Ok(())
}

#[test]
fn test_url_escaping() -> Result<()> {
Expand Down

0 comments on commit c5d0b0e

Please sign in to comment.