From 3251d15bc45b368c7569dc0d6b176fc06e47ab20 Mon Sep 17 00:00:00 2001 From: Tim Geoghegan Date: Thu, 20 Jun 2024 14:14:58 -0700 Subject: [PATCH] `docker.yml`: run demo script in CI (#1126) Add steps to the `compose` job in `docker.yml` that run the demo script from `cli/README.md` against a Docker Compose deployment to ensure that it works. Perhaps unsurprisingly, adding a test for this revealed a couple of bugs, which are also addressed in this change: - The task discovery interval and task creation intervals were poorly tuned - The leader was trying to reach the helper at `localhost:9002`, which is not routable inside of the Docker Compose network. So we run `nc` alongside the leader's Janus processes so that connections to `localhost:9002` get redirected to `janus_2_aggregator:8080`, which is routable inside Docker Compose. --- .github/workflows/docker.yml | 72 ++++++++++++++++++++++++++++++++---- compose.yaml | 34 +++++++++++++---- 2 files changed, 91 insertions(+), 15 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 77772813..aabc03da 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -67,19 +67,75 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - # This should work without any of the project sources except compose.yaml with: - sparse-checkout: | - compose.yaml - # Disable cone mode so we only grab a specific file - # https://github.com/actions/checkout?tab=readme-ov-file#fetch-only-a-single-file - # https://git-scm.com/docs/git-sparse-checkout - sparse-checkout-cone-mode: false + path: checkout + - uses: dtolnay/rust-toolchain@stable + - run: cargo build --package divviup-cli --profile release + working-directory: checkout + - run: | + mkdir compose + cp checkout/target/release/divviup compose/ + cp checkout/compose.yaml compose/ - name: Compose id: compose run: docker compose up --wait --wait-timeout 120 + working-directory: compose + - name: Demo script + id: demo-script + working-directory: compose + run: | + export DIVVIUP_API_URL=http://localhost:8080 + export DIVVIUP_TOKEN="" + + export DIVVIUP_ACCOUNT_ID=`./divviup account list | jq -r '.[0].id'` + printf 'account ID %s\n' $DIVVIUP_ACCOUNT_ID + + AGGREGATOR_LIST=`./divviup aggregator list` + echo "aggregator list:" + echo $AGGREGATOR_LIST + + LEADER_ID=`echo $AGGREGATOR_LIST | jq -r '.[] |= select(.name == "leader") |.[0].id'` + printf 'leader ID %s\n' $LEADER_ID + + HELPER_ID=`echo $AGGREGATOR_LIST | jq -r '.[] |= select(.name == "helper") |.[0].id'` + printf 'helper ID %s\n' $HELPER_ID + + CRED_OUTPUT=`./divviup collector-credential generate --save` + echo "collector credential:" + echo $CRED_OUTPUT + + COLLECTOR_CREDENTIAL_PATH=${PWD}/`echo $CRED_OUTPUT | jq -r '.name' 2>/dev/null || echo ''`.json + printf 'collector credential path %s\n' $COLLECTOR_CREDENTIAL_PATH + + COLLECTOR_CREDENTIAL_ID=`echo $CRED_OUTPUT | jq -r '.id' 2>/dev/null || echo ''` + printf 'collector credential ID %s\n' $COLLECTOR_CREDENTIAL_ID + + TASK=`./divviup task create --name net-promoter-score \ + --leader-aggregator-id $LEADER_ID --helper-aggregator-id $HELPER_ID \ + --collector-credential-id $COLLECTOR_CREDENTIAL_ID \ + --vdaf histogram --categorical-buckets 0,1,2,3,4,5,6,7,8,9,10 \ + --min-batch-size 100 --max-batch-size 200 --time-precision 60sec` + echo "task:" + echo $TASK + TASK_ID=`echo $TASK | jq -r '.id'` + + for i in {1..150}; do + measurement=$(( $RANDOM % 10 )) + ./divviup dap-client upload --task-id $TASK_ID --measurement $measurement; + done + + echo "finished uploading measurements" + + sleep 120 + + ./divviup dap-client collect \ + --task-id $TASK_ID \ + --collector-credential-file $COLLECTOR_CREDENTIAL_PATH \ + --current-batch + - name: Inspect containers - if: ${{ failure() && steps.compose.outcome != 'success' }} + if: ${{ failure() && (steps.compose.outcome != 'success' || steps.demo-script.outcome != 'success') }} + working-directory: compose run: | docker compose ps --all for NAME in `docker compose ps --all --format json | jq -r '.Name'`; do diff --git a/compose.yaml b/compose.yaml index 8dd39cf7..33da4b61 100644 --- a/compose.yaml +++ b/compose.yaml @@ -158,7 +158,17 @@ services: janus_1_aggregator: <<: *janus_common - entrypoint: ["/janus_aggregator", "aggregator"] + entrypoint: + - /bin/sh + - -c + # Crimes: janus_2_aggregator's DAP API is localhost:9002 in the divviup-api aggregator object, + # but that address is only valid in the host network, outside of Docker Compose. But we need + # various parts of janus_1 to be able to reach janus_2_aggregator there. So we run nc(1) + # alongside the Janus processes so that it listens on localhost:9002 and forwards traffic to + # janus_2_aggregator:8080, which _does_ exist on the Docker Compose network. + - | + nc -p 9002 -lk -e nc janus_2_aggregator 8080 & \ + /janus_aggregator aggregator configs: - janus_1_aggregator.yaml ports: @@ -178,7 +188,12 @@ services: janus_1_aggregation_job_driver: <<: *janus_common - entrypoint: ["/janus_aggregator", "aggregation_job_driver"] + entrypoint: + - /bin/sh + - -c + - | + nc -p 9002 -lk -e nc janus_2_aggregator 8080 & \ + /janus_aggregator aggregation_job_driver configs: - janus_1_aggregation_job_driver.yaml environment: @@ -187,7 +202,12 @@ services: janus_1_collection_job_driver: <<: *janus_common - entrypoint: ["/janus_aggregator", "collection_job_driver"] + entrypoint: + - /bin/sh + - -c + - | + nc -p 9002 -lk -e nc janus_2_aggregator 8080 & \ + /janus_aggregator collection_job_driver configs: - janus_1_collection_job_driver.yaml environment: @@ -281,8 +301,8 @@ configs: url: "postgres://postgres@postgres:5432/janus_1" health_check_listen_address: "0.0.0.0:8000" batch_aggregation_shard_count: 32 - tasks_update_frequency_secs: 3600 - aggregation_job_creation_interval_secs: 60 + tasks_update_frequency_secs: 10 + aggregation_job_creation_interval_secs: 10 min_aggregation_job_size: 10 max_aggregation_job_size: 100 @@ -340,8 +360,8 @@ configs: url: "postgres://postgres@postgres:5432/janus_2" health_check_listen_address: "0.0.0.0:8000" batch_aggregation_shard_count: 32 - tasks_update_frequency_secs: 3600 - aggregation_job_creation_interval_secs: 60 + tasks_update_frequency_secs: 10 + aggregation_job_creation_interval_secs: 10 min_aggregation_job_size: 10 max_aggregation_job_size: 100