diff --git a/config/services.yml b/config/services.yml index 9c8fd9db9..ff583dbda 100644 --- a/config/services.yml +++ b/config/services.yml @@ -412,7 +412,25 @@ https://cmr.earthdata.nasa.gov: - name: l2-subsetter-batchee-stitchee-concise description: | - Chained Service of the PODAAC L2-subsetter, Batchee, STITCHEE, and PODAAC CONCISE services. + ### Subsetter And Multi-dimensional Batched Aggregation in Harmony (SAMBAH) + Chained Service of the L2-subsetter, Batchee, STITCHEE, and CONCISE services. + Additional documentation [here](https://stitchee.readthedocs.io/en/latest/sambah_readme/). + #### L2 swath subsetter (L2-subsetter) + * Works with trajectory (1D) and along track/across track data. + * Works with netCDF and HDF5 input files. + * Supports variable subsetting. + * Supports temporal subsetting. + * Supports shape subsetting + * Works with hierarchical groups. + * Outputs netCDF4. + #### Batchee + * Service groups together filenames so that further operations (such as concatenation) can be performed separately on each group of files. + #### STITCH by Extending a dimEnsion (Stitchee) + * Service concatenates a group of netCDF data files along an existing dimension. + #### CONCatenation SErvice (CONCISE) + * Service capable of "concatenating" multiple netCDF files into a single netCDF file. + The resulting file has an extra dimension with size equal to the number of input files, where each slice in that dimension corresponds to the data from one of the input files. + data_operation_version: '0.19.0' type: <<: *default-turbo-config @@ -424,13 +442,14 @@ https://cmr.earthdata.nasa.gov: umm_s: S2940253910-LARC_CLOUD capabilities: concatenation: true - concatenate_by_default: true + concatenate_by_default: false extend: true default_extend_dimensions: ['mirror_step'] subsetting: bbox: true variable: true temporal: true + shape: true output_formats: - application/netcdf4 reprojection: false @@ -438,15 +457,19 @@ https://cmr.earthdata.nasa.gov: - image: !Env ${QUERY_CMR_IMAGE} is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - operations: ['spatialSubset', 'variableSubset', 'temporalSubset'] + operations: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] conditional: - exists: ['spatialSubset', 'variableSubset', 'temporalSubset'] + exists: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] extra_args: cut: false - image: !Env ${BATCHEE_IMAGE} operations: ['concatenate'] + conditional: + exists: ['concatenate'] - image: !Env ${STITCHEE_IMAGE} operations: ['extend'] + conditional: + exists: ['concatenate'] - image: !Env ${PODAAC_CONCISE_IMAGE} is_batched: true operations: ['concatenate'] @@ -1143,7 +1166,25 @@ https://cmr.uat.earthdata.nasa.gov: - name: l2-subsetter-batchee-stitchee-concise description: | - Chained Service of the PODAAC L2-subsetter, Batchee, STITCHEE, and PODAAC CONCISE services. + ### Subsetter And Multi-dimensional Batched Aggregation in Harmony (SAMBAH) + Chained Service of the L2-subsetter, Batchee, STITCHEE, and CONCISE services. + Additional documentation [here](https://stitchee.readthedocs.io/en/latest/sambah_readme/). + #### L2 swath subsetter (L2-subsetter) + * Works with trajectory (1D) and along track/across track data. + * Works with netCDF and HDF5 input files. + * Supports variable subsetting. + * Supports temporal subsetting. + * Supports shape subsetting + * Works with hierarchical groups. + * Outputs netCDF4. + #### Batchee + * Service groups together filenames so that further operations (such as concatenation) can be performed separately on each group of files. + #### STITCH by Extending a dimEnsion (Stitchee) + * Service concatenates a group of netCDF data files along an existing dimension. + #### CONCatenation SErvice (CONCISE) + * Service capable of "concatenating" multiple netCDF files into a single netCDF file. + The resulting file has an extra dimension with size equal to the number of input files, where each slice in that dimension corresponds to the data from one of the input files. + The resulting file has an extra dimension with size equal to the number of input files, where each slice in that dimension corresponds to the data from one of the input files. data_operation_version: '0.19.0' type: <<: *default-turbo-config @@ -1155,13 +1196,14 @@ https://cmr.uat.earthdata.nasa.gov: umm_s: S1262025641-LARC_CLOUD capabilities: concatenation: true - concatenate_by_default: true + concatenate_by_default: false extend: true default_extend_dimensions: ['mirror_step'] subsetting: bbox: true variable: true temporal: true + shape: true output_formats: - application/netcdf4 reprojection: false @@ -1169,15 +1211,19 @@ https://cmr.uat.earthdata.nasa.gov: - image: !Env ${QUERY_CMR_IMAGE} is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - operations: ['spatialSubset', 'variableSubset', 'temporalSubset'] + operations: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] conditional: - exists: ['spatialSubset', 'variableSubset', 'temporalSubset'] + exists: ['spatialSubset', 'shapefileSubset', 'variableSubset', 'temporalSubset'] extra_args: cut: false - image: !Env ${BATCHEE_IMAGE} operations: ['concatenate'] + conditional: + exists: ['concatenate'] - image: !Env ${STITCHEE_IMAGE} operations: ['extend'] + conditional: + exists: ['concatenate'] - image: !Env ${PODAAC_CONCISE_IMAGE} is_batched: true operations: ['concatenate'] diff --git a/packages/util/env-defaults b/packages/util/env-defaults index 49666b817..9af7110b5 100644 --- a/packages/util/env-defaults +++ b/packages/util/env-defaults @@ -124,8 +124,8 @@ PODAAC_L2_SUBSETTER_SERVICE_QUEUE_URLS='["ghcr.io/podaac/l2ss-py:sit,http://sqs. PODAAC_PS3_SERVICE_QUEUE_URLS='["podaac/podaac-cloud/podaac-shapefile-subsetter:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/podaac-shapefile-subsetter.fifo"]' PODAAC_NETCDF_CONVERTER_SERVICE_QUEUE_URLS='["podaac/podaac-cloud/podaac-netcdf-converter:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/podaac-netcdf-converter.fifo"]' QUERY_CMR_SERVICE_QUEUE_URLS='["harmonyservices/query-cmr:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/query-cmr.fifo"]' -BATCHEE_SERVICE_QUEUE_URLS='["asdc-trade/batchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/batchee.fifo"]' -STITCHEE_SERVICE_QUEUE_URLS='["asdc-trade/stitchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/stitchee.fifo"]' +BATCHEE_SERVICE_QUEUE_URLS='["ghcr.io/nasa/batchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/batchee.fifo"]' +STITCHEE_SERVICE_QUEUE_URLS='["ghcr.io/nasa/stitchee:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/stitchee.fifo"]' # The number of seconds to allow a pod to continue processing an active request before terminating a pod DEFAULT_POD_GRACE_PERIOD_SECS=14400 diff --git a/scripts/service-comparison.ts b/scripts/service-comparison.ts index 71c0459dc..494b8da1e 100644 --- a/scripts/service-comparison.ts +++ b/scripts/service-comparison.ts @@ -8,6 +8,7 @@ import { exit } from 'process'; import { loadServiceConfigs } from '../services/harmony/app/models/services'; import { CmrUmmService, cmrApiConfig, getServicesByIds } from '../services/harmony/app/util/cmr'; import { ServiceConfig } from '../services/harmony/app/models/services/base-service'; +import validation from 'ajv/dist/vocabularies/validation'; /** * Validates spatial subsetting configuration matches @@ -167,8 +168,28 @@ async function runComparisons(environments = allEnvironments): Promise { const ummRecord = ummRecordsMap[harmonyConfig.umm_s]; const validationMessages = performValidations(ummRecord, harmonyConfig); if (validationMessages.length > 0) { - exitCode = 1; - console.log(`Validation failures for ${harmonyConfig.name} and ${ummRecord.meta['concept-id']}:\n - ${validationMessages.join('\n - ')}`); + // For SAMBAH we're allowing for a difference in concatenate_by_default since for API + // users they want that to be false, but in EDSC they want the default to be to have the + // box checked. + if (harmonyConfig.name == 'l2-subsetter-batchee-stitchee-concise') { + // only _warn_ about concatenate by default difference - other messages are actual errors + const failureMessages = validationMessages.reduce((acc, message) => { + if (message != 'Concatenate by default mismatch: harmony is false and UMM-S is true.') { + acc.push(message); + } else { + console.log(`WARNING: ${harmonyConfig.name} and ${ummRecord.meta['concept-id']} differ:\n - ${message}`); + } + return acc; + }, []); + + if (failureMessages.length > 0) { + exitCode = 1; + console.log(`ERROR: Validation failures for ${harmonyConfig.name} and ${ummRecord.meta['concept-id']}:\n - ${failureMessages.join('\n - ')}`); + } + } else { + exitCode = 1; + console.log(`ERROR: Validation failures for ${harmonyConfig.name} and ${ummRecord.meta['concept-id']}:\n - ${validationMessages.join('\n - ')}`); + } } } } diff --git a/services/harmony/.nsprc b/services/harmony/.nsprc index 462296805..ba4815ec6 100644 --- a/services/harmony/.nsprc +++ b/services/harmony/.nsprc @@ -4,16 +4,6 @@ "notes": "Will fix in HARMONY-1650", "expiry": "2024-11-01" }, - "1096482": { - "active": true, - "notes": "Will fix in HARMONY-1700", - "expiry": "2024-11-01" - }, - "1096484": { - "active": true, - "notes": "Will fix in HARMONY-1700", - "expiry": "2024-11-01" - }, "1096727": { "active": true, "notes": "Will fix in HARMONY-1729", @@ -22,11 +12,11 @@ "1097493": { "active": true, "notes": "ignored because it doesn't affect us and there is not current patch", - "expiry": "2024-09-01" + "expiry": "2024-11-01" }, "1097346": { "active": true, "notes": "ignored because it doesn't affect us and there is not current patch", - "expiry": "2024-09-01" + "expiry": "2024-11-01" } } diff --git a/services/harmony/env-defaults b/services/harmony/env-defaults index 377be561b..17c26ce80 100644 --- a/services/harmony/env-defaults +++ b/services/harmony/env-defaults @@ -489,12 +489,12 @@ SUBSET_BAND_NAME_LIMITS_MEMORY=2048Mi SUBSET_BAND_NAME_INVOCATION_ARGS='python3 /app/harmony_python_interface/adapter.py' SUBSET_BAND_NAME_SERVICE_QUEUE_URLS='["ldds/subset-band-name:latest,http://sqs.us-west-2.localhost.localstack.cloud:4566/000000000000/subset-band-name.fifo"]' -BATCHEE_IMAGE=asdc-trade/batchee:latest +BATCHEE_IMAGE=ghcr.io/nasa/batchee:latest BATCHEE_REQUESTS_MEMORY=128Mi BATCHEE_LIMITS_MEMORY=512Mi BATCHEE_INVOCATION_ARGS='./docker-entrypoint.sh' -STITCHEE_IMAGE=asdc-trade/stitchee:latest +STITCHEE_IMAGE=ghcr.io/nasa/stitchee:latest STITCHEE_REQUESTS_CPU=128m STITCHEE_LIMITS_CPU=128m STITCHEE_REQUESTS_MEMORY=128Mi