Skip to content

Commit

Permalink
Add graceful shutdown during DB loading
Browse files Browse the repository at this point in the history
  • Loading branch information
danielle-tfh committed Jan 16, 2025
1 parent 0549736 commit 48ff95b
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/temp-branch-build-and-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Branch - Build and push docker image
on:
push:
branches:
- "chore/increase-stalled-stream-protection"
- "add-graceful-shutdown-during-s3-sync"

concurrency:
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
Expand Down
8 changes: 7 additions & 1 deletion iris-mpc-store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ use futures::{
use iris_mpc_common::{
config::Config,
galois_engine::degree4::{GaloisRingIrisCodeShare, GaloisRingTrimmedMaskCodeShare},
helpers::shutdown_handler::ShutdownHandler,
iris_db::iris::IrisCode,
};
use rand::{rngs::StdRng, Rng, SeedableRng};
pub use s3_importer::{fetch_and_parse_chunks, last_snapshot_timestamp, ObjectStore, S3Store};
use sqlx::{
migrate::Migrator, postgres::PgPoolOptions, Executor, PgPool, Postgres, Row, Transaction,
};
use std::ops::DerefMut;
use std::{ops::DerefMut, sync::Arc};

const APP_NAME: &str = "SMPC";
const MAX_CONNECTIONS: u32 = 100;
Expand Down Expand Up @@ -186,12 +187,17 @@ impl Store {
&self,
min_last_modified_at: Option<i64>,
partitions: usize,
shutdown_handler: Arc<ShutdownHandler>,
) -> impl Stream<Item = eyre::Result<StoredIris>> + '_ {
let count = self.count_irises().await.expect("Failed count_irises");
let partition_size = count.div_ceil(partitions).max(1);

let mut partition_streams = Vec::new();
for i in 0..partitions {
if shutdown_handler.is_shutting_down() {
tracing::info!("Shutdown triggered before processing chunk {}", i);
break;
}
// we start from ID 1
let start_id = 1 + partition_size * i;
let end_id = start_id + partition_size - 1;
Expand Down
24 changes: 21 additions & 3 deletions iris-mpc-store/src/s3_importer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use async_trait::async_trait;
use aws_sdk_s3::{primitives::ByteStream, Client};
use eyre::eyre;
use futures::{stream, Stream, StreamExt};
use iris_mpc_common::{IRIS_CODE_LENGTH, MASK_CODE_LENGTH};
use iris_mpc_common::{
helpers::shutdown_handler::ShutdownHandler, IRIS_CODE_LENGTH, MASK_CODE_LENGTH,
};
use std::{
mem,
pin::Pin,
Expand Down Expand Up @@ -236,6 +238,7 @@ pub async fn fetch_and_parse_chunks(
concurrency: usize,
prefix_name: String,
last_snapshot_details: LastSnapshotDetails,
shutdown_handler: Arc<ShutdownHandler>,
) -> Pin<Box<dyn Stream<Item = eyre::Result<StoredIris>> + Send + '_>> {
tracing::info!("Generating chunk files using: {:?}", last_snapshot_details);
let range_size = if last_snapshot_details.chunk_size as usize > MAX_RANGE_SIZE {
Expand All @@ -253,7 +256,12 @@ pub async fn fetch_and_parse_chunks(
move |chunk| {
let counter = total_bytes_clone.clone();
let prefix_name = prefix_name.clone();
let shutdown_handler_clone = shutdown_handler.clone();
async move {
if shutdown_handler_clone.is_shutting_down() {
tracing::info!("Shutdown triggered before processing chunk {}", chunk);
return Err(eyre::eyre!("Shutdown triggered"));
}
let chunk_id = (chunk / last_snapshot_details.chunk_size)
* last_snapshot_details.chunk_size
+ 1;
Expand Down Expand Up @@ -407,6 +415,8 @@ mod tests {
const MOCK_ENTRIES: usize = 107;
const MOCK_CHUNK_SIZE: usize = 10;
let mut store = MockStore::new();
let shutdown_handler = Arc::new(ShutdownHandler::new(60));
shutdown_handler.wait_for_shutdown_signal().await;
let n_chunks = MOCK_ENTRIES.div_ceil(MOCK_CHUNK_SIZE);
for i in 0..n_chunks {
let start_serial_id = i * MOCK_CHUNK_SIZE + 1;
Expand All @@ -423,8 +433,14 @@ mod tests {
last_serial_id: MOCK_ENTRIES as i64,
chunk_size: MOCK_CHUNK_SIZE as i64,
};
let mut chunks =
fetch_and_parse_chunks(&store, 1, "out".to_string(), last_snapshot_details).await;
let mut chunks = fetch_and_parse_chunks(
&store,
1,
"out".to_string(),
last_snapshot_details,
shutdown_handler,
)
.await;
let mut count = 0;
let mut ids: HashSet<usize> = HashSet::from_iter(1..MOCK_ENTRIES);
while let Some(chunk) = chunks.next().await {
Expand All @@ -436,3 +452,5 @@ mod tests {
assert!(ids.is_empty());
}
}

// TODO: add test
29 changes: 25 additions & 4 deletions iris-mpc/src/bin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ async fn server_main(config: Config) -> eyre::Result<()> {
let load_chunks_parallelism = config.load_chunks_parallelism;
let db_chunks_bucket_name = config.db_chunks_bucket_name.clone();
let db_chunks_folder_name = config.db_chunks_folder_name.clone();
let download_shutdown_handler = Arc::clone(&shutdown_handler);

let (tx, rx) = oneshot::channel();
background_tasks.spawn_blocking(move || {
Expand Down Expand Up @@ -1051,6 +1052,8 @@ async fn server_main(config: Config) -> eyre::Result<()> {
"Initialize iris db: Loading from DB (parallelism: {})",
parallelism
);
let s3_shutdown_handler = Arc::clone(&download_shutdown_handler);
let post_download_shutdown_handler = Arc::clone(&download_shutdown_handler);
let s3_store = S3Store::new(s3_client_clone, db_chunks_bucket_name);
tokio::runtime::Handle::current().block_on(async {
let mut stream = match config.enable_s3_importer {
Expand All @@ -1074,25 +1077,37 @@ async fn server_main(config: Config) -> eyre::Result<()> {
load_chunks_parallelism,
db_chunks_folder_name,
last_snapshot_details,
s3_shutdown_handler,
)
.await
.boxed();

let stream_db = store
.stream_irises_par(Some(min_last_modified_at), parallelism)
.stream_irises_par(
Some(min_last_modified_at),
parallelism,
download_shutdown_handler,
)
.await
.boxed();

select_all(vec![stream_s3, stream_db])
}
false => {
tracing::info!("S3 importer disabled. Fetching only from db");
let stream_db =
store.stream_irises_par(None, parallelism).await.boxed();
let stream_db = store
.stream_irises_par(None, parallelism, download_shutdown_handler)
.await
.boxed();
select_all(vec![stream_db])
}
};

if post_download_shutdown_handler.is_shutting_down() {
tracing::warn!("Shutdown requested by post_download_shutdown_handler.");
return Err(eyre::eyre!("Shutdown requested"));
}

tracing::info!("Page-lock host memory");
let left_codes = actor.left_code_db_slices.code_gr.clone();
let right_codes = actor.right_code_db_slices.code_gr.clone();
Expand Down Expand Up @@ -1131,6 +1146,12 @@ async fn server_main(config: Config) -> eyre::Result<()> {
let mut n_loaded_from_db = 0;
let mut n_loaded_from_s3 = 0;
while let Some(result) = stream.try_next().await? {
if post_download_shutdown_handler.is_shutting_down() {
tracing::warn!(
"Shutdown requested by post_download_shutdown_handler."
);
return Err(eyre::eyre!("Shutdown requested"));
}
time_waiting_for_stream += now_load_summary.elapsed();
now_load_summary = Instant::now();
let index = result.index();
Expand Down Expand Up @@ -1265,7 +1286,7 @@ async fn server_main(config: Config) -> eyre::Result<()> {
let sns_client_bg = sns_client.clone();
let config_bg = config.clone();
let store_bg = store.clone();
let shutdown_handler_bg = shutdown_handler.clone();
let shutdown_handler_bg = Arc::clone(&shutdown_handler);
let _result_sender_abort = background_tasks.spawn(async move {
while let Some(ServerJobResult {
merged_results,
Expand Down

0 comments on commit 48ff95b

Please sign in to comment.