From d859ceee34e21d5ad2e8eed64c9e9c40fc1b9c65 Mon Sep 17 00:00:00 2001 From: James Norton Date: Thu, 29 Aug 2024 15:03:38 -0400 Subject: [PATCH 1/3] HARMONY-1789: Change SAMBAH to only concatenate when the `concatenate` flag is set to true and update the description --- config/services.yml | 62 ++++++++++++++++++++++++++++++----- packages/util/env-defaults | 4 +-- scripts/service-comparison.ts | 10 ++++-- services/harmony/env-defaults | 4 +-- 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/config/services.yml b/config/services.yml index 9c8fd9db9..ff583dbda 100644 --- a/config/services.yml +++ b/config/services.yml @@ -412,7 +412,25 @@ https://cmr.earthdata.nasa.gov: - name: l2-subsetter-batchee-stitchee-concise description: | - Chained Service of the PODAAC L2-subsetter, Batchee, STITCHEE, and PODAAC CONCISE services. + ### Subsetter And Multi-dimensional Batched Aggregation in Harmony (SAMBAH) + Chained Service of the L2-subsetter, Batchee, STITCHEE, and CONCISE services. + Additional documentation [here](https://stitchee.readthedocs.io/en/latest/sambah_readme/). + #### L2 swath subsetter (L2-subsetter) + * Works with trajectory (1D) and along track/across track data. + * Works with netCDF and HDF5 input files. + * Supports variable subsetting. + * Supports temporal subsetting. + * Supports shape subsetting + * Works with hierarchical groups. + * Outputs netCDF4. + #### Batchee + * Service groups together filenames so that further operations (such as concatenation) can be performed separately on each group of files. + #### STITCH by Extending a dimEnsion (Stitchee) + * Service concatenates a group of netCDF data files along an existing dimension. + #### CONCatenation SErvice (CONCISE) + * Service capable of "concatenating" multiple netCDF files into a single netCDF file. + The resulting file has an extra dimension with size equal to the number of input files, where each slice in that dimension corresponds to the data from one of the input files. + data_operation_version: '0.19.0' type: <<: *default-turbo-config @@ -424,13 +442,14 @@ https://cmr.earthdata.nasa.gov: umm_s: S2940253910-LARC_CLOUD capabilities: concatenation: true - concatenate_by_default: true + concatenate_by_default: false extend: true default_extend_dimensions: ['mirror_step'] subsetting: bbox: true variable: true temporal: true + shape: true output_formats: - application/netcdf4 reprojection: false @@ -438,15 +457,19 @@ https://cmr.earthdata.nasa.gov: - image: !Env ${QUERY_CMR_IMAGE} is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - operations: ['spatialSubset', 'variableSubset', 'temporalSubset'] + operations: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] conditional: - exists: ['spatialSubset', 'variableSubset', 'temporalSubset'] + exists: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] extra_args: cut: false - image: !Env ${BATCHEE_IMAGE} operations: ['concatenate'] + conditional: + exists: ['concatenate'] - image: !Env ${STITCHEE_IMAGE} operations: ['extend'] + conditional: + exists: ['concatenate'] - image: !Env ${PODAAC_CONCISE_IMAGE} is_batched: true operations: ['concatenate'] @@ -1143,7 +1166,25 @@ https://cmr.uat.earthdata.nasa.gov: - name: l2-subsetter-batchee-stitchee-concise description: | - Chained Service of the PODAAC L2-subsetter, Batchee, STITCHEE, and PODAAC CONCISE services. + ### Subsetter And Multi-dimensional Batched Aggregation in Harmony (SAMBAH) + Chained Service of the L2-subsetter, Batchee, STITCHEE, and CONCISE services. + Additional documentation [here](https://stitchee.readthedocs.io/en/latest/sambah_readme/). + #### L2 swath subsetter (L2-subsetter) + * Works with trajectory (1D) and along track/across track data. + * Works with netCDF and HDF5 input files. + * Supports variable subsetting. + * Supports temporal subsetting. + * Supports shape subsetting + * Works with hierarchical groups. + * Outputs netCDF4. + #### Batchee + * Service groups together filenames so that further operations (such as concatenation) can be performed separately on each group of files. + #### STITCH by Extending a dimEnsion (Stitchee) + * Service concatenates a group of netCDF data files along an existing dimension. + #### CONCatenation SErvice (CONCISE) + * Service capable of "concatenating" multiple netCDF files into a single netCDF file. + The resulting file has an extra dimension with size equal to the number of input files, where each slice in that dimension corresponds to the data from one of the input files. + The resulting file has an extra dimension with size equal to the number of input files, where each slice in that dimension corresponds to the data from one of the input files. data_operation_version: '0.19.0' type: <<: *default-turbo-config @@ -1155,13 +1196,14 @@ https://cmr.uat.earthdata.nasa.gov: umm_s: S1262025641-LARC_CLOUD capabilities: concatenation: true - concatenate_by_default: true + concatenate_by_default: false extend: true default_extend_dimensions: ['mirror_step'] subsetting: bbox: true variable: true temporal: true + shape: true output_formats: - application/netcdf4 reprojection: false @@ -1169,15 +1211,19 @@ https://cmr.uat.earthdata.nasa.gov: - image: !Env ${QUERY_CMR_IMAGE} is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - operations: ['spatialSubset', 'variableSubset', 'temporalSubset'] + operations: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] conditional: - exists: ['spatialSubset', 'variableSubset', 'temporalSubset'] + exists: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] extra_args: cut: false - image: !Env ${BATCHEE_IMAGE} operations: ['concatenate'] + conditional: + exists: ['concatenate'] - image: !Env ${STITCHEE_IMAGE} operations: ['extend'] + conditional: + exists: ['concatenate'] - image: !Env ${PODAAC_CONCISE_IMAGE} is_batched: true operations: ['concatenate'] diff --git a/packages/util/env-defaults b/packages/util/env-defaults index 49666b817..9af7110b5 100644 --- a/packages/util/env-defaults +++ b/packages/util/env-defaults @@ -124,8 +124,8 @@ PODAAC_L2_SUBSETTER_SERVICE_QUEUE_URLS='["ghcr.io/podaac/l2ss-py:sit,http://sqs. PODAAC_PS3_SERVICE_QUEUE_URLS='["podaac/podaac-cloud/podaac-shapefile-subsetter:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/podaac-shapefile-subsetter.fifo"]' PODAAC_NETCDF_CONVERTER_SERVICE_QUEUE_URLS='["podaac/podaac-cloud/podaac-netcdf-converter:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/podaac-netcdf-converter.fifo"]' QUERY_CMR_SERVICE_QUEUE_URLS='["harmonyservices/query-cmr:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/query-cmr.fifo"]' -BATCHEE_SERVICE_QUEUE_URLS='["asdc-trade/batchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/batchee.fifo"]' -STITCHEE_SERVICE_QUEUE_URLS='["asdc-trade/stitchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/stitchee.fifo"]' +BATCHEE_SERVICE_QUEUE_URLS='["ghcr.io/nasa/batchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/batchee.fifo"]' +STITCHEE_SERVICE_QUEUE_URLS='["ghcr.io/nasa/stitchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/stitchee.fifo"]' # The number of seconds to allow a pod to continue processing an active request before terminating a pod DEFAULT_POD_GRACE_PERIOD_SECS=14400 diff --git a/scripts/service-comparison.ts b/scripts/service-comparison.ts index 71c0459dc..d841aa5e2 100644 --- a/scripts/service-comparison.ts +++ b/scripts/service-comparison.ts @@ -167,8 +167,14 @@ async function runComparisons(environments = allEnvironments): Promise { const ummRecord = ummRecordsMap[harmonyConfig.umm_s]; const validationMessages = performValidations(ummRecord, harmonyConfig); if (validationMessages.length > 0) { - exitCode = 1; - console.log(`Validation failures for ${harmonyConfig.name} and ${ummRecord.meta['concept-id']}:\n - ${validationMessages.join('\n - ')}`); + // TODO this is a temporary check until the UMM records for this service chain are updated + // to match the changes in services.yml + if (harmonyConfig.name != 'l2-subsetter-batchee-stitchee-concise') { + exitCode = 1; + console.log(`ERROR: Validation failures for ${harmonyConfig.name} and ${ummRecord.meta['concept-id']}:\n - ${validationMessages.join('\n - ')}`); + } else { + console.log(`WARNING: ${harmonyConfig.name} and ${ummRecord.meta['concept-id']} differ:\n - ${validationMessages.join('\n - ')}`); + } } } } diff --git a/services/harmony/env-defaults b/services/harmony/env-defaults index 377be561b..17c26ce80 100644 --- a/services/harmony/env-defaults +++ b/services/harmony/env-defaults @@ -489,12 +489,12 @@ SUBSET_BAND_NAME_LIMITS_MEMORY=2048Mi SUBSET_BAND_NAME_INVOCATION_ARGS='python3 /app/harmony_python_interface/adapter.py' SUBSET_BAND_NAME_SERVICE_QUEUE_URLS='["ldds/subset-band-name:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/subset-band-name.fifo"]' -BATCHEE_IMAGE=asdc-trade/batchee:latest +BATCHEE_IMAGE=ghcr.io/nasa/batchee:latest BATCHEE_REQUESTS_MEMORY=128Mi BATCHEE_LIMITS_MEMORY=512Mi BATCHEE_INVOCATION_ARGS='./docker-entrypoint.sh' -STITCHEE_IMAGE=asdc-trade/stitchee:latest +STITCHEE_IMAGE=ghcr.io/nasa/stitchee:latest STITCHEE_REQUESTS_CPU=128m STITCHEE_LIMITS_CPU=128m STITCHEE_REQUESTS_MEMORY=128Mi From 78b33f734cc4c7779e5c313ae8f605f4004de62f Mon Sep 17 00:00:00 2001 From: James Norton Date: Tue, 3 Sep 2024 10:04:51 -0400 Subject: [PATCH 2/3] HARMONY-1789: Change service comparison to fail for SAMBAH if shapefile support is not the same --- scripts/service-comparison.ts | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/scripts/service-comparison.ts b/scripts/service-comparison.ts index d841aa5e2..494b8da1e 100644 --- a/scripts/service-comparison.ts +++ b/scripts/service-comparison.ts @@ -8,6 +8,7 @@ import { exit } from 'process'; import { loadServiceConfigs } from '../services/harmony/app/models/services'; import { CmrUmmService, cmrApiConfig, getServicesByIds } from '../services/harmony/app/util/cmr'; import { ServiceConfig } from '../services/harmony/app/models/services/base-service'; +import validation from 'ajv/dist/vocabularies/validation'; /** * Validates spatial subsetting configuration matches @@ -167,13 +168,27 @@ async function runComparisons(environments = allEnvironments): Promise { const ummRecord = ummRecordsMap[harmonyConfig.umm_s]; const validationMessages = performValidations(ummRecord, harmonyConfig); if (validationMessages.length > 0) { - // TODO this is a temporary check until the UMM records for this service chain are updated - // to match the changes in services.yml - if (harmonyConfig.name != 'l2-subsetter-batchee-stitchee-concise') { + // For SAMBAH we're allowing for a difference in concatenate_by_default since for API + // users they want that to be false, but in EDSC they want the default to be to have the + // box checked. + if (harmonyConfig.name == 'l2-subsetter-batchee-stitchee-concise') { + // only _warn_ about concatenate by default difference - other messages are actual errors + const failureMessages = validationMessages.reduce((acc, message) => { + if (message != 'Concatenate by default mismatch: harmony is false and UMM-S is true.') { + acc.push(message); + } else { + console.log(`WARNING: ${harmonyConfig.name} and ${ummRecord.meta['concept-id']} differ:\n - ${message}`); + } + return acc; + }, []); + + if (failureMessages.length > 0) { + exitCode = 1; + console.log(`ERROR: Validation failures for ${harmonyConfig.name} and ${ummRecord.meta['concept-id']}:\n - ${failureMessages.join('\n - ')}`); + } + } else { exitCode = 1; console.log(`ERROR: Validation failures for ${harmonyConfig.name} and ${ummRecord.meta['concept-id']}:\n - ${validationMessages.join('\n - ')}`); - } else { - console.log(`WARNING: ${harmonyConfig.name} and ${ummRecord.meta['concept-id']} differ:\n - ${validationMessages.join('\n - ')}`); } } } From e155a3383f23a804719af1d8912c4ee7d763268e Mon Sep 17 00:00:00 2001 From: James Norton Date: Tue, 3 Sep 2024 13:16:35 -0400 Subject: [PATCH 3/3] HARMONY-1789: Clean up .nsprc --- services/harmony/.nsprc | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/services/harmony/.nsprc b/services/harmony/.nsprc index 462296805..ba4815ec6 100644 --- a/services/harmony/.nsprc +++ b/services/harmony/.nsprc @@ -4,16 +4,6 @@ "notes": "Will fix in HARMONY-1650", "expiry": "2024-11-01" }, - "1096482": { - "active": true, - "notes": "Will fix in HARMONY-1700", - "expiry": "2024-11-01" - }, - "1096484": { - "active": true, - "notes": "Will fix in HARMONY-1700", - "expiry": "2024-11-01" - }, "1096727": { "active": true, "notes": "Will fix in HARMONY-1729", @@ -22,11 +12,11 @@ "1097493": { "active": true, "notes": "ignored because it doesn't affect us and there is not current patch", - "expiry": "2024-09-01" + "expiry": "2024-11-01" }, "1097346": { "active": true, "notes": "ignored because it doesn't affect us and there is not current patch", - "expiry": "2024-09-01" + "expiry": "2024-11-01" } }