diff --git a/eden/scm/lib/edenapi/src/builder.rs b/eden/scm/lib/edenapi/src/builder.rs index 5aef2c27cabe4..05ae384fe2ff6 100644 --- a/eden/scm/lib/edenapi/src/builder.rs +++ b/eden/scm/lib/edenapi/src/builder.rs @@ -117,6 +117,7 @@ pub struct HttpClientBuilder { max_location_to_hash: Option, max_commit_mutations: Option, max_commit_translate_id: Option, + min_batch_size: Option, timeout: Option, debug: bool, http_version: Option, @@ -171,6 +172,7 @@ impl HttpClientBuilder { get_config(config, "edenapi", "try-route-consistently")?.unwrap_or_default(); let max_history = get_config(config, "edenapi", "maxhistory")?; let max_location_to_hash = get_config(config, "edenapi", "maxlocationtohash")?; + let min_batch_size = get_config(config, "edenapi", "min-batch-size")?; let max_commit_mutations = get_config(config, "edenapi", "maxcommitmutations")?; let max_commit_translate_id = get_config(config, "edenapi", "maxcommittranslateid")?; let timeout = get_config(config, "edenapi", "timeout")?.map(Duration::from_secs); @@ -218,6 +220,7 @@ impl HttpClientBuilder { max_location_to_hash, max_commit_mutations, max_commit_translate_id, + min_batch_size, timeout, debug, http_version, @@ -368,6 +371,7 @@ pub(crate) struct Config { pub(crate) max_location_to_hash: Option, pub(crate) max_commit_mutations: Option, pub(crate) max_commit_translate_id: Option, + pub(crate) min_batch_size: Option, pub(crate) timeout: Option, #[allow(dead_code)] pub(crate) debug: bool, @@ -394,6 +398,7 @@ impl TryFrom for Config { max_location_to_hash, max_commit_mutations, max_commit_translate_id, + min_batch_size, timeout, debug, http_version, @@ -431,6 +436,7 @@ impl TryFrom for Config { max_location_to_hash, max_commit_mutations, max_commit_translate_id, + min_batch_size, timeout, debug, http_version, diff --git a/eden/scm/lib/edenapi/src/client.rs b/eden/scm/lib/edenapi/src/client.rs index 5c246f1f0f85b..c24180cab27d8 100644 --- a/eden/scm/lib/edenapi/src/client.rs +++ b/eden/scm/lib/edenapi/src/client.rs @@ -263,6 +263,7 @@ impl Client { url: &Url, keys: K, batch_size: Option, + min_batch_size: Option, mut make_req: F, mut mutate_url: G, ) -> Result, EdenApiError> @@ -272,7 +273,7 @@ impl Client { G: FnMut(&Url, &Vec) -> Url, R: ToWire, { - split_into_batches(keys, batch_size) + split_into_batches(keys, batch_size, min_batch_size) .into_iter() .map(|keys| { let url = mutate_url(url, &keys); @@ -457,11 +458,13 @@ impl Client { let url = self.build_url(paths::TREES)?; let try_route_consistently = self.config().try_route_consistently; + let min_batch_size: Option = self.config().min_batch_size; let requests = self.prepare_requests( &url, keys, self.config().max_trees, + min_batch_size, |keys| { let req = TreeRequest { keys, @@ -499,11 +502,13 @@ impl Client { let url = self.build_url(paths::FILES2)?; let try_route_consistently = self.config().try_route_consistently; + let min_batch_size: Option = self.config().min_batch_size; let requests = self.prepare_requests( &url, reqs, self.config().max_files, + min_batch_size, |reqs| { let req = FileRequest { reqs, keys: vec![] }; self.log_request(&req, "files"); @@ -739,11 +744,13 @@ impl Client { let url = self.build_url(paths::HISTORY)?; let try_route_consistently = self.config().try_route_consistently; + let min_batch_size: Option = self.config().min_batch_size; let requests = self.prepare_requests( &url, keys, self.config().max_history, + min_batch_size, |keys| { let req = HistoryRequest { keys, length }; self.log_request(&req, "history"); @@ -781,6 +788,7 @@ impl Client { &url, files, Some(MAX_CONCURRENT_BLAMES_PER_REQUEST), + None, |files| { let req = BlameRequest { files }; self.log_request(&req, "blame"); @@ -809,6 +817,7 @@ impl Client { &url, commits, self.config().max_commit_translate_id, + None, |commits| { let req = CommitTranslateIdRequest { commits, @@ -934,6 +943,7 @@ impl Client { &url, items, Some(MAX_CONCURRENT_UPLOAD_FILENODES_PER_REQUEST), + None, |ids| Batch::<_> { batch: ids .into_iter() @@ -960,6 +970,7 @@ impl Client { &url, items, Some(MAX_CONCURRENT_UPLOAD_TREES_PER_REQUEST), + None, |ids| Batch::<_> { batch: ids .into_iter() @@ -1082,6 +1093,7 @@ impl EdenApi for Client { &url, prefixes, Some(MAX_CONCURRENT_HASH_LOOKUPS_PER_REQUEST), + None, |prefixes| Batch::<_> { batch: prefixes }, |url, _keys| url.clone(), )?; @@ -1197,6 +1209,7 @@ impl EdenApi for Client { &url, requests, self.config().max_location_to_hash, + None, |requests| { let batch = CommitLocationToHashRequestBatch { requests }; self.log_request(&batch, "commit_location_to_hash"); @@ -1229,6 +1242,7 @@ impl EdenApi for Client { &url, hgids, self.config().max_location_to_hash, + None, |hgids| { let batch = CommitHashToLocationRequestBatch { master_heads: master_heads.clone(), @@ -1356,6 +1370,7 @@ impl EdenApi for Client { &url, items, Some(MAX_CONCURRENT_LOOKUPS_PER_REQUEST), + None, |ids| Batch:: { batch: ids .into_iter() @@ -1530,6 +1545,7 @@ impl EdenApi for Client { &url, commits, self.config().max_commit_mutations, + None, |commits| { let req = CommitMutationsRequest { commits }; self.log_request(&req, "commit_mutations"); @@ -1571,14 +1587,31 @@ impl EdenApi for Client { fn split_into_batches( keys: impl IntoIterator, batch_size: Option, + min_batch_size: Option, ) -> Vec> { match batch_size { - Some(n) => keys - .into_iter() - .chunks(n) - .into_iter() - .map(Vec::from_iter) - .collect(), + Some(n) => { + let mut chunks_vec = Vec::new(); + for chunk in keys.into_iter().chunks(n).into_iter() { + let v = Vec::from_iter(chunk); + // This bit is used to not construct small batches + // because they are not routed consistently and + // because of that are subuptimal. + if let Some(min_batch_size) = min_batch_size { + if v.len() >= min_batch_size { + chunks_vec.push(v); + } else { + for key in v.into_iter() { + chunks_vec.push(vec![key]); + } + } + } else { + chunks_vec.push(v); + } + } + + chunks_vec + } None => vec![keys.into_iter().collect()], } } @@ -1641,6 +1674,34 @@ mod tests { use anyhow::Result; use crate::builder::HttpClientBuilder; + use crate::client::split_into_batches; + + #[test] + fn test_split_into_batches() -> Result<()> { + let keys = vec![1, 2, 3]; + let result = split_into_batches(keys, Some(2), None); + assert_eq!(vec![vec![1, 2], vec![3]], result); + + let keys = vec![1, 2, 3, 4]; + let result = split_into_batches(keys, Some(2), None); + assert_eq!(vec![vec![1, 2], vec![3, 4]], result); + + let keys = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let result = split_into_batches(keys, Some(4), Some(3)); + assert_eq!( + vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8], vec![9], vec![10]], + result + ); + + let keys = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let result = split_into_batches(keys, Some(4), None); + assert_eq!( + vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8], vec![9, 10]], + result + ); + + Ok(()) + } #[test] fn test_url_escaping() -> Result<()> {