diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 26e86c02b9a..28be192bb33 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -1,6 +1,6 @@ # Google Cloud node deployments and tests that run when Rust code or dependencies are modified, # but only on PRs from the ZcashFoundation/zebra repository. -# (External PRs are tested/deployed by mergify.) +# (External PRs are tested/deployed by mergify.) # # 1. `versioning`: Extracts the major version from the release semver. Useful for segregating instances based on major versions. # 2. `build`: Builds a Docker image named `zebrad` with the necessary tags derived from Git. @@ -30,58 +30,80 @@ on: workflow_dispatch: inputs: network: - default: 'Mainnet' - description: 'Network to deploy: Mainnet or Testnet' + default: Mainnet + description: "Network to deploy: Mainnet or Testnet" required: true - log_file: - default: '' - description: 'Log to a file path rather than standard output' + type: choice + options: + - Mainnet + - Testnet + cached_disk_type: + default: tip + description: "Type of cached disk to use" + required: true + type: choice + options: + - tip + - checkpoint + prefer_main_cached_state: + default: false + description: "Prefer cached state from the main branch" + required: false + type: boolean + no_cached_disk: + default: false + description: "Do not use a cached state disk" + required: false + type: boolean no_cache: - description: 'Disable the Docker cache for this build' + description: "Disable the Docker cache for this build" required: false type: boolean default: false + log_file: + default: "" + description: "Log to a file path rather than standard output" push: - # Skip main branch updates where Rust code and dependencies aren't modified. - branches: - - main - paths: - # code and tests - - '**/*.rs' - # hard-coded checkpoints and proptest regressions - - '**/*.txt' - # dependencies - - '**/Cargo.toml' - - '**/Cargo.lock' - # configuration files - - '.cargo/config.toml' - - '**/clippy.toml' - # workflow definitions - - 'docker/**' - - '.dockerignore' - - '.github/workflows/cd-deploy-nodes-gcp.yml' - - '.github/workflows/sub-build-docker-image.yml' + # Skip main branch updates where Rust code and dependencies aren't modified. + branches: + - main + paths: + # code and tests + - "**/*.rs" + # hard-coded checkpoints and proptest regressions + - "**/*.txt" + # dependencies + - "**/Cargo.toml" + - "**/Cargo.lock" + # configuration files + - ".cargo/config.toml" + - "**/clippy.toml" + # workflow definitions + - "docker/**" + - ".dockerignore" + - ".github/workflows/cd-deploy-nodes-gcp.yml" + - ".github/workflows/sub-build-docker-image.yml" # Only runs the Docker image tests, doesn't deploy any instances pull_request: # Skip PRs where Rust code and dependencies aren't modified. paths: # code and tests - - '**/*.rs' + - "**/*.rs" # hard-coded checkpoints and proptest regressions - - '**/*.txt' + - "**/*.txt" # dependencies - - '**/Cargo.toml' - - '**/Cargo.lock' + - "**/Cargo.toml" + - "**/Cargo.lock" # configuration files - - '.cargo/config.toml' - - '**/clippy.toml' + - ".cargo/config.toml" + - "**/clippy.toml" # workflow definitions - - 'docker/**' - - '.dockerignore' - - '.github/workflows/cd-deploy-nodes-gcp.yml' - - '.github/workflows/sub-build-docker-image.yml' + - "docker/**" + - ".dockerignore" + - ".github/workflows/cd-deploy-nodes-gcp.yml" + - ".github/workflows/sub-build-docker-image.yml" release: types: @@ -144,11 +166,11 @@ jobs: needs: build uses: ./.github/workflows/sub-test-zebra-config.yml with: - test_id: 'default-conf' + test_id: "default-conf" docker_image: ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} grep_patterns: '-e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"' - test_variables: '-e NETWORK' - network: 'Mainnet' + test_variables: "-e NETWORK" + network: "Mainnet" # Test reconfiguring the docker image for testnet. test-configuration-file-testnet: @@ -157,11 +179,11 @@ jobs: # Make sure Zebra can sync the genesis block on testnet uses: ./.github/workflows/sub-test-zebra-config.yml with: - test_id: 'testnet-conf' + test_id: "testnet-conf" docker_image: ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} grep_patterns: '-e "net.*=.*Test.*estimated progress to chain tip.*Genesis" -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"' - test_variables: '-e NETWORK' - network: 'Testnet' + test_variables: "-e NETWORK" + network: "Testnet" # Finds a `tip` cached state disk for zebra from the main branch # @@ -170,11 +192,12 @@ jobs: get-disk-name: name: Get disk name uses: ./.github/workflows/sub-find-cached-disks.yml + if: ${{ !inputs.no_cached_disk }} with: network: ${{ inputs.network || vars.ZCASH_NETWORK }} disk_prefix: zebrad-cache - disk_suffix: tip - prefer_main_cached_state: true + disk_suffix: ${{ inputs.cached_disk_type || 'tip' }} + prefer_main_cached_state: ${{ inputs.prefer_main_cached_state || (github.event_name == 'push' && github.ref_name == 'main' && true) || false }} # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet, # with one node in the configured GCP region. @@ -196,14 +219,21 @@ jobs: matrix: network: [Mainnet, Testnet] name: Deploy ${{ matrix.network }} nodes - needs: [ build, versioning, test-configuration-file, test-zebra-conf-path, get-disk-name ] + needs: + [ + build, + versioning, + test-configuration-file, + test-zebra-conf-path, + get-disk-name, + ] runs-on: ubuntu-latest timeout-minutes: 60 env: CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }} permissions: - contents: 'read' - id-token: 'write' + contents: "read" + id-token: "write" if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') }} steps: @@ -232,20 +262,20 @@ jobs: id: auth uses: google-github-actions/auth@v2.1.6 with: - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' + workload_identity_provider: "${{ vars.GCP_WIF }}" + service_account: "${{ vars.GCP_DEPLOYMENTS_SA }}" - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 - # TODO we should implement the fixes from https://github.com/ZcashFoundation/zebra/pull/5670 here - # but the implementation is failing as it's requiring the disk names, contrary to what is stated in the official documentation - name: Create instance template for ${{ matrix.network }} run: | - NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" - DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd" + DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" + DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}" + elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then + echo "No cached disk required" else echo "No cached disk found for ${{ matrix.network }} in main branch" exit 1 @@ -258,7 +288,7 @@ jobs: --image-family=cos-stable \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ --create-disk="${DISK_PARAMS}" \ - --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \ + --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ --container-tty \ --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ @@ -306,15 +336,16 @@ jobs: # Note: this instances are not automatically replaced or deleted deploy-instance: name: Deploy single ${{ inputs.network }} instance - needs: [ build, test-configuration-file, test-zebra-conf-path, get-disk-name ] + needs: [build, test-configuration-file, test-zebra-conf-path, get-disk-name] runs-on: ubuntu-latest timeout-minutes: 30 env: CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }} permissions: - contents: 'read' - id-token: 'write' - if: github.event_name == 'workflow_dispatch' + contents: "read" + id-token: "write" + # Run even if we don't need a cached disk, but only when triggered by a workflow_dispatch + if: ${{ !failure() && github.event_name == 'workflow_dispatch' }} steps: - uses: actions/checkout@v4.2.1 @@ -342,8 +373,8 @@ jobs: id: auth uses: google-github-actions/auth@v2.1.6 with: - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' + workload_identity_provider: "${{ vars.GCP_WIF }}" + service_account: "${{ vars.GCP_DEPLOYMENTS_SA }}" - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 @@ -351,10 +382,12 @@ jobs: # Create instance template from container image - name: Manual deploy of a single ${{ inputs.network }} instance running zebrad run: | - NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" - DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd" + DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" + DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd" if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}" + elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then + echo "No cached disk required" else echo "No cached disk found for ${{ matrix.network }} in main branch" exit 1 @@ -367,7 +400,7 @@ jobs: --image-family=cos-stable \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ --create-disk="${DISK_PARAMS}" \ - --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \ + --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \ --container-stdin \ --container-tty \ --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ @@ -382,7 +415,7 @@ jobs: failure-issue: name: Open or update issues for release failures # When a new job is added to this workflow, add it to this list. - needs: [ versioning, build, deploy-nodes, deploy-instance ] + needs: [versioning, build, deploy-nodes, deploy-instance] # Only open tickets for failed or cancelled jobs that are not coming from PRs. # (PR statuses are already reported in the PR jobs list, and checked by Mergify.) if: (failure() && github.event.pull_request == null) || (cancelled() && github.event.pull_request == null) diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml index 4fb148e1b38..b9966de9058 100644 --- a/.github/workflows/ci-lint.yml +++ b/.github/workflows/ci-lint.yml @@ -93,7 +93,7 @@ jobs: run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=stable --profile=default - - uses: Swatinem/rust-cache@v2.7.3 + - uses: Swatinem/rust-cache@v2.7.5 with: shared-key: "clippy-cargo-lock" @@ -138,7 +138,7 @@ jobs: # We don't cache `fmt` outputs because the job is quick, # and we want to use the limited GitHub actions cache space for slower jobs. - #- uses: Swatinem/rust-cache@v2.7.3 + #- uses: Swatinem/rust-cache@v2.7.5 - run: | cargo fmt --all -- --check diff --git a/.github/workflows/ci-unit-tests-os.yml b/.github/workflows/ci-unit-tests-os.yml index 372cef69218..7c194c51c5e 100644 --- a/.github/workflows/ci-unit-tests-os.yml +++ b/.github/workflows/ci-unit-tests-os.yml @@ -112,7 +112,7 @@ jobs: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=${{ matrix.rust }} --profile=minimal - - uses: Swatinem/rust-cache@v2.7.3 + - uses: Swatinem/rust-cache@v2.7.5 # TODO: change Rust cache target directory on Windows, # or remove this workaround once the build is more efficient (#3005). #with: @@ -221,7 +221,7 @@ jobs: run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=stable --profile=minimal - - uses: Swatinem/rust-cache@v2.7.3 + - uses: Swatinem/rust-cache@v2.7.5 with: shared-key: "clippy-cargo-lock" diff --git a/.github/workflows/docs-deploy-firebase.yml b/.github/workflows/docs-deploy-firebase.yml index 9da2c869f94..1c8ce7fd773 100644 --- a/.github/workflows/docs-deploy-firebase.yml +++ b/.github/workflows/docs-deploy-firebase.yml @@ -155,7 +155,7 @@ jobs: run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=beta --profile=default - - uses: Swatinem/rust-cache@v2.7.3 + - uses: Swatinem/rust-cache@v2.7.5 - name: Build internal docs run: | diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh index 0f38addf10f..9716dc9f5a7 100755 --- a/.github/workflows/scripts/gcp-get-cached-disks.sh +++ b/.github/workflows/scripts/gcp-get-cached-disks.sh @@ -3,9 +3,9 @@ # This script finds a cached Google Cloud Compute image based on specific criteria. # # If there are multiple disks: -# - prefer images generated from the same commit, then -# - if prefer_main_cached_state is true, prefer images from the `main` branch, then -# - use any images from any other branch or commit. +# - if `PREFER_MAIN_CACHED_STATE` is "true", then select an image from the `main` branch, else +# - try to find a cached disk image from the current branch (or PR), else +# - try to find an image from any branch. # # Within each of these categories: # - prefer newer images to older images @@ -20,7 +20,7 @@ echo "Extracting local state version..." LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) echo "STATE_VERSION: ${LOCAL_STATE_VERSION}" -# Function to find a cached disk image based on the git pattern (commit, main, or any branch) +# Function to find a cached disk image based on the git pattern (branch, main, or any branch) find_cached_disk_image() { local git_pattern="${1}" local git_source="${2}" @@ -34,40 +34,36 @@ find_cached_disk_image() { echo "Found ${git_source} Disk: ${disk_name}" >&2 disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)") echo "Description: ${disk_description}" >&2 - echo "${disk_name}" # This is the actual return value when a disk is found + echo "${disk_name}" # This is the actual return value when a disk is found else - echo "No ${git_source} disk found." >&2 + echo "No ${git_source} disk found with '${disk_search_pattern}' pattern." >&2 fi } -# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image +# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to +# find a cached disk image. if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then # Find the most suitable cached disk image - echo "Finding the most suitable cached disk image..." + echo "Finding a ${DISK_PREFIX}-${DISK_SUFFIX} disk image for ${NETWORK}..." CACHED_DISK_NAME="" - # First, try to find a cached disk image from the current commit - CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit") - - # If no cached disk image is found - if [[ -z "${CACHED_DISK_NAME}" ]]; then - # Check if main branch images are preferred - if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then - CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch") - # Else, try to find one from any branch - else - CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch") - fi + # Try to find an image based on the `main` branch if that branch is preferred. + if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then + CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch") fi + # If no image was found, try to find one from the current branch (or PR). + CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-${GITHUB_REF}" "branch")} + # If we still have no image, try to find one from any branch. + CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")} - # Handle case where no suitable disk image is found + # Handle the case where no suitable disk image is found if [[ -z "${CACHED_DISK_NAME}" ]]; then - echo "No suitable cached state disk available." - echo "Cached state test jobs must depend on the cached state rebuild job." + echo "No suitable cached state disk available. Try running the cached state rebuild job." exit 1 + else + echo "Selected Disk: ${CACHED_DISK_NAME}" fi - echo "Selected Disk: ${CACHED_DISK_NAME}" else echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search." fi @@ -77,7 +73,6 @@ find_available_disk_type() { local base_name="${1}" local disk_type="${2}" local disk_pattern="${base_name}-cache" - local output_var="${base_name}_${disk_type}_disk" local disk_name disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) @@ -87,10 +82,10 @@ find_available_disk_type() { echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2 disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)") echo "Description: ${disk_description}" >&2 - echo "true" # This is the actual return value when a disk is found + echo "true" # This is the actual return value when a disk is found else echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2 - echo "false" # This is the actual return value when no disk is found + echo "false" # This is the actual return value when no disk is found fi } if [[ -n "${NETWORK}" ]]; then diff --git a/.github/workflows/sub-deploy-integration-tests-gcp.yml b/.github/workflows/sub-deploy-integration-tests-gcp.yml index fb880a8a369..7266f60ea54 100644 --- a/.github/workflows/sub-deploy-integration-tests-gcp.yml +++ b/.github/workflows/sub-deploy-integration-tests-gcp.yml @@ -654,6 +654,7 @@ jobs: # (This is unlikely, because each image created by a workflow has a different name.) # # The image name must also be 63 characters or less. + # More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format # # Force the image creation (--force) as the disk is still attached even though is not being # used by the container. diff --git a/.github/workflows/sub-find-cached-disks.yml b/.github/workflows/sub-find-cached-disks.yml index c936d65f8bd..a71237887e2 100644 --- a/.github/workflows/sub-find-cached-disks.yml +++ b/.github/workflows/sub-find-cached-disks.yml @@ -74,20 +74,30 @@ jobs: - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 + # Performs formatting on disk name components. + # # Disk images in GCP are required to be in lowercase, but the blockchain network - # uses sentence case, so we need to downcase ${{ inputs.network }} + # uses sentence case, so we need to downcase ${{ inputs.network }}. + # + # Disk image names in GCP are limited to 63 characters, so we need to limit + # branch names to 12 characters. + # Check the `create-state-image` in `sub-deploy-integration-tests-gcp.yml` for more details in image names. + # More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format # - # Passes a lowercase Network name to subsequent steps using $NETWORK env variable - - name: Downcase network name for disks + # Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable. + # Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable. + - name: Format network name and branch name for disks run: | - NETWORK_CAPS=${{ inputs.network }} - echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV + NETWORK_CAPS="${{ inputs.network }}" + echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV" + LONG_GITHUB_REF="${{ env.GITHUB_REF_SLUG_URL }}" + echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:12}" >> "$GITHUB_ENV" # Check if there are cached state disks available for subsequent jobs to use. - name: Check if cached state disks exists id: get-available-disks env: - GITHUB_SHA_SHORT: ${{ env.GITHUB_SHA_SHORT }} + GITHUB_REF: ${{ env.SHORT_GITHUB_REF }} NETWORK: ${{ env.NETWORK }} # use lowercase version from env, not input DISK_PREFIX: ${{ inputs.disk_prefix }} DISK_SUFFIX: ${{ inputs.disk_suffix }}