From c4dc93decee5fc747959b444c3e238e9d970d0a2 Mon Sep 17 00:00:00 2001 From: James Norton Date: Thu, 13 Feb 2025 16:56:22 -0500 Subject: [PATCH 1/4] HARMONY-2007: Add is_sequential: true where missing for query-cmr in services.yml and validation for same --- config/services.yml | 3 ++ services/harmony/app/models/job.ts | 1 - services/harmony/app/models/services/index.ts | 26 +++++++++------ services/harmony/test/models/index.ts | 33 ++++++++++++++++++- .../test/resources/services_no_umm_s_prod.yml | 2 ++ .../test/resources/services_no_umm_s_uat.yml | 2 ++ .../services_umm_s_not_string_prod.yml | 2 ++ .../services_umm_s_not_string_uat.yml | 2 ++ .../resources/services_with_colls_prod.yml | 2 ++ .../resources/services_with_colls_uat.yml | 2 ++ 10 files changed, 63 insertions(+), 12 deletions(-) diff --git a/config/services.yml b/config/services.yml index 28b8c4f8f..0314be56d 100644 --- a/config/services.yml +++ b/config/services.yml @@ -429,6 +429,7 @@ https://cmr.earthdata.nasa.gov: reprojection: true steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${HYBIG_IMAGE} - name: harmony/netcdf-to-zarr @@ -576,6 +577,7 @@ https://cmr.earthdata.nasa.gov: reprojection: true steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${OPERA_RTC_S1_BROWSE_IMAGE} - image: !Env ${HYBIG_IMAGE} @@ -1385,5 +1387,6 @@ https://cmr.uat.earthdata.nasa.gov: reprojection: true steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${OPERA_RTC_S1_BROWSE_IMAGE} - image: !Env ${HYBIG_IMAGE} diff --git a/services/harmony/app/models/job.ts b/services/harmony/app/models/job.ts index 0ad4f90d9..cab27e9e2 100644 --- a/services/harmony/app/models/job.ts +++ b/services/harmony/app/models/job.ts @@ -194,7 +194,6 @@ const stateMachine = createMachine( active: true, }, on: Object.fromEntries([ - [JobEvent.COMPLETE, { target: JobStatus.SUCCESSFUL }], [JobEvent.COMPLETE_WITH_ERRORS, { target: JobStatus.COMPLETE_WITH_ERRORS }], [JobEvent.CANCEL, { target: JobStatus.CANCELED }], [JobEvent.FAIL, { target: JobStatus.FAILED }], diff --git a/services/harmony/app/models/services/index.ts b/services/harmony/app/models/services/index.ts index deb1c2759..98410e577 100644 --- a/services/harmony/app/models/services/index.ts +++ b/services/harmony/app/models/services/index.ts @@ -1,20 +1,21 @@ import * as fs from 'fs'; -import * as path from 'path'; import * as yaml from 'js-yaml'; import _, { get as getIn } from 'lodash'; +import * as path from 'path'; + +import { Conjunction, isInteger, listToText } from '@harmony/util/string'; -import logger from '../../util/log'; -import { HttpError, NotFoundError, ServerError } from '../../util/errors'; -import { isMimeTypeAccepted, allowsAny } from '../../util/content-negotiation'; -import { CmrCollection } from '../../util/cmr'; import { addCollectionsToServicesByAssociation } from '../../middleware/service-selection'; -import { listToText, Conjunction, isInteger } from '@harmony/util/string'; -import TurboService from './turbo-service'; -import HttpService from './http-service'; +import { CmrCollection } from '../../util/cmr'; +import { allowsAny, isMimeTypeAccepted } from '../../util/content-negotiation'; +import env from '../../util/env'; +import { HttpError, NotFoundError, ServerError } from '../../util/errors'; +import logger from '../../util/log'; import DataOperation from '../data-operation'; -import BaseService, { ServiceConfig } from './base-service'; import RequestContext from '../request-context'; -import env from '../../util/env'; +import BaseService, { ServiceConfig } from './base-service'; +import HttpService from './http-service'; +import TurboService from './turbo-service'; let serviceConfigs: ServiceConfig[] = null; @@ -109,6 +110,11 @@ function validateServiceConfigSteps(config: ServiceConfig): void { + `Configured to use ${maxBatchInputs}, but will be limited to ${env.maxGranuleLimit}`); } } + if (step.image.match(/harmonyservices\/query\-cmr:.*/)) { + if (!step.is_sequential) { + throw new TypeError(`Invalid is_sequential ${step.is_sequential}. query-cmr steps must always have sequential = true.`); + } + } } } diff --git a/services/harmony/test/models/index.ts b/services/harmony/test/models/index.ts index 214a4e56b..9fb3e2ac6 100644 --- a/services/harmony/test/models/index.ts +++ b/services/harmony/test/models/index.ts @@ -1,7 +1,10 @@ import { expect } from 'chai'; -import { loadServiceConfigs, loadServiceConfigsFromFile, getServiceConfigs, validateServiceConfig } from '../../app/models/services'; import _ from 'lodash'; +import { + getServiceConfigs, loadServiceConfigs, loadServiceConfigsFromFile, validateServiceConfig, +} from '../../app/models/services'; + const cmrEndpoints = { 'uat': 'https://cmr.uat.earthdata.nasa.gov', 'prod': 'https://cmr.earthdata.nasa.gov', @@ -74,4 +77,32 @@ describe('Services.yml validation', function () { expect(() => configs.forEach(validateServiceConfig)).to.throw(/Collections cannot be configured for harmony service: with-collections, use umm_s instead./); }); }); + + describe('services.yml with unset is_sequential for query-cmr in UAT is invalid', function () { + it('throws an exception', function () { + const configs = loadServiceConfigsFromFile(cmrEndpoints.uat, '../../../test/resources/services_with_unset_is_sequential_query_cmr_uat.yml'); + expect(() => configs.forEach(validateServiceConfig)).to.throw(/Invalid is_sequential undefined. query-cmr steps must always have sequential = true./); + }); + }); + + describe('services.yml with unset is_sequential for query-cmr in PROD is invalid', function () { + it('throws an exception', function () { + const configs = loadServiceConfigsFromFile(cmrEndpoints.prod, '../../../test/resources/services_with_unset_is_sequential_query_cmr_prod.yml'); + expect(() => configs.forEach(validateServiceConfig)).to.throw(/Invalid is_sequential undefined. query-cmr steps must always have sequential = true./); + }); + }); + + describe('services.yml with false is_sequential for query-cmr in UAT is invalid', function () { + it('throws an exception', function () { + const configs = loadServiceConfigsFromFile(cmrEndpoints.uat, '../../../test/resources/services_with_false_is_sequential_query_cmr_uat.yml'); + expect(() => configs.forEach(validateServiceConfig)).to.throw(/Invalid is_sequential false. query-cmr steps must always have sequential = true./); + }); + }); + + describe('services.yml with false is_sequential for query-cmr in PROD is invalid', function () { + it('throws an exception', function () { + const configs = loadServiceConfigsFromFile(cmrEndpoints.prod, '../../../test/resources/services_with_false_is_sequential_query_cmr_prod.yml'); + expect(() => configs.forEach(validateServiceConfig)).to.throw(/Invalid is_sequential false. query-cmr steps must always have sequential = true./); + }); + }); }); diff --git a/services/harmony/test/resources/services_no_umm_s_prod.yml b/services/harmony/test/resources/services_no_umm_s_prod.yml index de1449f8c..85462b525 100644 --- a/services/harmony/test/resources/services_no_umm_s_prod.yml +++ b/services/harmony/test/resources/services_no_umm_s_prod.yml @@ -52,6 +52,7 @@ https://cmr.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} @@ -76,4 +77,5 @@ https://cmr.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} diff --git a/services/harmony/test/resources/services_no_umm_s_uat.yml b/services/harmony/test/resources/services_no_umm_s_uat.yml index 04ee0d622..a96fa4cd9 100644 --- a/services/harmony/test/resources/services_no_umm_s_uat.yml +++ b/services/harmony/test/resources/services_no_umm_s_uat.yml @@ -44,6 +44,7 @@ https://cmr.uat.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - name: podaac/l2-subsetter @@ -75,5 +76,6 @@ https://cmr.uat.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} diff --git a/services/harmony/test/resources/services_umm_s_not_string_prod.yml b/services/harmony/test/resources/services_umm_s_not_string_prod.yml index 04a5ed6d8..209e2d221 100644 --- a/services/harmony/test/resources/services_umm_s_not_string_prod.yml +++ b/services/harmony/test/resources/services_umm_s_not_string_prod.yml @@ -52,6 +52,7 @@ https://cmr.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - name: umm_s_not_string @@ -77,4 +78,5 @@ https://cmr.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} diff --git a/services/harmony/test/resources/services_umm_s_not_string_uat.yml b/services/harmony/test/resources/services_umm_s_not_string_uat.yml index 5a2fb328a..56774cebe 100644 --- a/services/harmony/test/resources/services_umm_s_not_string_uat.yml +++ b/services/harmony/test/resources/services_umm_s_not_string_uat.yml @@ -46,6 +46,7 @@ https://cmr.uat.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - name: podaac/l2-subsetter @@ -77,5 +78,6 @@ https://cmr.uat.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} diff --git a/services/harmony/test/resources/services_with_colls_prod.yml b/services/harmony/test/resources/services_with_colls_prod.yml index 3277f2db6..a403754a8 100644 --- a/services/harmony/test/resources/services_with_colls_prod.yml +++ b/services/harmony/test/resources/services_with_colls_prod.yml @@ -47,6 +47,7 @@ https://cmr.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - name: podaac/l2-subsetter @@ -78,5 +79,6 @@ https://cmr.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} diff --git a/services/harmony/test/resources/services_with_colls_uat.yml b/services/harmony/test/resources/services_with_colls_uat.yml index 625cabc06..8724cf012 100644 --- a/services/harmony/test/resources/services_with_colls_uat.yml +++ b/services/harmony/test/resources/services_with_colls_uat.yml @@ -47,6 +47,7 @@ https://cmr.uat.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} - name: podaac/l2-subsetter @@ -78,5 +79,6 @@ https://cmr.uat.earthdata.nasa.gov: - application/x-netcdf4 steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} From 961592ddbae49833667b2bd3399d6ef6a29596f6 Mon Sep 17 00:00:00 2001 From: James Norton Date: Thu, 13 Feb 2025 17:36:21 -0500 Subject: [PATCH 2/4] HARMONY-2007: Clean up nested ifs --- services/harmony/app/models/services/index.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/services/harmony/app/models/services/index.ts b/services/harmony/app/models/services/index.ts index 98410e577..f15dce97e 100644 --- a/services/harmony/app/models/services/index.ts +++ b/services/harmony/app/models/services/index.ts @@ -110,10 +110,9 @@ function validateServiceConfigSteps(config: ServiceConfig): void { + `Configured to use ${maxBatchInputs}, but will be limited to ${env.maxGranuleLimit}`); } } - if (step.image.match(/harmonyservices\/query\-cmr:.*/)) { - if (!step.is_sequential) { - throw new TypeError(`Invalid is_sequential ${step.is_sequential}. query-cmr steps must always have sequential = true.`); - } + if (step.image.match(/harmonyservices\/query\-cmr:.*/) && !step.is_sequential) { + + throw new TypeError(`Invalid is_sequential ${step.is_sequential}. query-cmr steps must always have sequential = true.`); } } } From e0a0f62d0654b0a78ac7f3670c5145e76d2c7093 Mon Sep 17 00:00:00 2001 From: James Norton Date: Thu, 13 Feb 2025 18:03:55 -0500 Subject: [PATCH 3/4] HARMONY-2007: Add missed file commits for test --- ...ith_false_is_sequential_query_cmr_prod.yml | 52 +++++++++++++++++++ ...with_false_is_sequential_query_cmr_uat.yml | 52 +++++++++++++++++++ ...ith_unset_is_sequential_query_cmr_prod.yml | 51 ++++++++++++++++++ ...with_unset_is_sequential_query_cmr_uat.yml | 51 ++++++++++++++++++ 4 files changed, 206 insertions(+) create mode 100644 services/harmony/test/resources/services_with_false_is_sequential_query_cmr_prod.yml create mode 100644 services/harmony/test/resources/services_with_false_is_sequential_query_cmr_uat.yml create mode 100644 services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_prod.yml create mode 100644 services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_uat.yml diff --git a/services/harmony/test/resources/services_with_false_is_sequential_query_cmr_prod.yml b/services/harmony/test/resources/services_with_false_is_sequential_query_cmr_prod.yml new file mode 100644 index 000000000..ca3baf97f --- /dev/null +++ b/services/harmony/test/resources/services_with_false_is_sequential_query_cmr_prod.yml @@ -0,0 +1,52 @@ +# Order for each CMR endpoint in this file will reflect precedence of the service when +# multiple services handle a collection + +# Default turbo configuration +x-turbo-config: &default-turbo-config + name: turbo + params: &default-turbo-params + env: &default-turbo-env + USE_LOCALSTACK: !Env ${USE_LOCALSTACK} + LOCALSTACK_HOST: !Env ${BACKEND_HOST} + AWS_DEFAULT_REGION: us-west-2 + STAGING_BUCKET: !Env ${STAGING_BUCKET} + TEXT_LOGGER: !Env ${TEXT_LOGGER} + BACKEND_HOST: !Env ${BACKEND_HOST} + EDL_USERNAME: !Env ${EDL_USERNAME} + EDL_PASSWORD: !Env ${EDL_PASSWORD} + OAUTH_UID: !Env ${OAUTH_UID} + OAUTH_PASSWORD: !Env ${OAUTH_PASSWORD} + OAUTH_HOST: !Env ${OAUTH_HOST} + OAUTH_CLIENT_ID: !Env ${OAUTH_CLIENT_ID} + OAUTH_REDIRECT_URI: !Env ${OAUTH_REDIRECT_URI} + FALLBACK_AUTHN_ENABLED: !Env ${FALLBACK_AUTHN_ENABLED} + +https://cmr.earthdata.nasa.gov: + + - name: non-sequential-query-cmr + description: | + testing service configuration with query-cmr with no is_sequential: + data_operation_version: '0.20.0' + type: + <<: *default-turbo-config + params: + <<: *default-turbo-params + env: + <<: *default-turbo-env + STAGING_PATH: public/asf/opera-rtc-s1-browse + umm_s: S1271728813-ASF + maximum_sync_granules: 0 + capabilities: + concatenation: false + subsetting: + bbox: false + variable: false + temporal: false + output_formats: + - image/png + reprojection: true + steps: + - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: false + - image: !Env ${OPERA_RTC_S1_BROWSE_IMAGE} + - image: !Env ${HYBIG_IMAGE} \ No newline at end of file diff --git a/services/harmony/test/resources/services_with_false_is_sequential_query_cmr_uat.yml b/services/harmony/test/resources/services_with_false_is_sequential_query_cmr_uat.yml new file mode 100644 index 000000000..bcb1e7e0f --- /dev/null +++ b/services/harmony/test/resources/services_with_false_is_sequential_query_cmr_uat.yml @@ -0,0 +1,52 @@ +# Order for each CMR endpoint in this file will reflect precedence of the service when +# multiple services handle a collection + +# Default turbo configuration +x-turbo-config: &default-turbo-config + name: turbo + params: &default-turbo-params + env: &default-turbo-env + USE_LOCALSTACK: !Env ${USE_LOCALSTACK} + LOCALSTACK_HOST: !Env ${BACKEND_HOST} + AWS_DEFAULT_REGION: us-west-2 + STAGING_BUCKET: !Env ${STAGING_BUCKET} + TEXT_LOGGER: !Env ${TEXT_LOGGER} + BACKEND_HOST: !Env ${BACKEND_HOST} + EDL_USERNAME: !Env ${EDL_USERNAME} + EDL_PASSWORD: !Env ${EDL_PASSWORD} + OAUTH_UID: !Env ${OAUTH_UID} + OAUTH_PASSWORD: !Env ${OAUTH_PASSWORD} + OAUTH_HOST: !Env ${OAUTH_HOST} + OAUTH_CLIENT_ID: !Env ${OAUTH_CLIENT_ID} + OAUTH_REDIRECT_URI: !Env ${OAUTH_REDIRECT_URI} + FALLBACK_AUTHN_ENABLED: !Env ${FALLBACK_AUTHN_ENABLED} + +https://cmr.uat.earthdata.nasa.gov: + + - name: non-sequential-query-cmr + description: | + testing service configuration with query-cmr with no is_sequential: + data_operation_version: '0.20.0' + type: + <<: *default-turbo-config + params: + <<: *default-turbo-params + env: + <<: *default-turbo-env + STAGING_PATH: public/asf/opera-rtc-s1-browse + umm_s: S1271728813-ASF + maximum_sync_granules: 0 + capabilities: + concatenation: false + subsetting: + bbox: false + variable: false + temporal: false + output_formats: + - image/png + reprojection: true + steps: + - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: false + - image: !Env ${OPERA_RTC_S1_BROWSE_IMAGE} + - image: !Env ${HYBIG_IMAGE} \ No newline at end of file diff --git a/services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_prod.yml b/services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_prod.yml new file mode 100644 index 000000000..1efbc4203 --- /dev/null +++ b/services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_prod.yml @@ -0,0 +1,51 @@ +# Order for each CMR endpoint in this file will reflect precedence of the service when +# multiple services handle a collection + +# Default turbo configuration +x-turbo-config: &default-turbo-config + name: turbo + params: &default-turbo-params + env: &default-turbo-env + USE_LOCALSTACK: !Env ${USE_LOCALSTACK} + LOCALSTACK_HOST: !Env ${BACKEND_HOST} + AWS_DEFAULT_REGION: us-west-2 + STAGING_BUCKET: !Env ${STAGING_BUCKET} + TEXT_LOGGER: !Env ${TEXT_LOGGER} + BACKEND_HOST: !Env ${BACKEND_HOST} + EDL_USERNAME: !Env ${EDL_USERNAME} + EDL_PASSWORD: !Env ${EDL_PASSWORD} + OAUTH_UID: !Env ${OAUTH_UID} + OAUTH_PASSWORD: !Env ${OAUTH_PASSWORD} + OAUTH_HOST: !Env ${OAUTH_HOST} + OAUTH_CLIENT_ID: !Env ${OAUTH_CLIENT_ID} + OAUTH_REDIRECT_URI: !Env ${OAUTH_REDIRECT_URI} + FALLBACK_AUTHN_ENABLED: !Env ${FALLBACK_AUTHN_ENABLED} + +https://cmr.earthdata.nasa.gov: + + - name: non-sequential-query-cmr + description: | + testing service configuration with query-cmr with no is_sequential: + data_operation_version: '0.20.0' + type: + <<: *default-turbo-config + params: + <<: *default-turbo-params + env: + <<: *default-turbo-env + STAGING_PATH: public/asf/opera-rtc-s1-browse + umm_s: S1271728813-ASF + maximum_sync_granules: 0 + capabilities: + concatenation: false + subsetting: + bbox: false + variable: false + temporal: false + output_formats: + - image/png + reprojection: true + steps: + - image: !Env ${QUERY_CMR_IMAGE} + - image: !Env ${OPERA_RTC_S1_BROWSE_IMAGE} + - image: !Env ${HYBIG_IMAGE} \ No newline at end of file diff --git a/services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_uat.yml b/services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_uat.yml new file mode 100644 index 000000000..5187283fe --- /dev/null +++ b/services/harmony/test/resources/services_with_unset_is_sequential_query_cmr_uat.yml @@ -0,0 +1,51 @@ +# Order for each CMR endpoint in this file will reflect precedence of the service when +# multiple services handle a collection + +# Default turbo configuration +x-turbo-config: &default-turbo-config + name: turbo + params: &default-turbo-params + env: &default-turbo-env + USE_LOCALSTACK: !Env ${USE_LOCALSTACK} + LOCALSTACK_HOST: !Env ${BACKEND_HOST} + AWS_DEFAULT_REGION: us-west-2 + STAGING_BUCKET: !Env ${STAGING_BUCKET} + TEXT_LOGGER: !Env ${TEXT_LOGGER} + BACKEND_HOST: !Env ${BACKEND_HOST} + EDL_USERNAME: !Env ${EDL_USERNAME} + EDL_PASSWORD: !Env ${EDL_PASSWORD} + OAUTH_UID: !Env ${OAUTH_UID} + OAUTH_PASSWORD: !Env ${OAUTH_PASSWORD} + OAUTH_HOST: !Env ${OAUTH_HOST} + OAUTH_CLIENT_ID: !Env ${OAUTH_CLIENT_ID} + OAUTH_REDIRECT_URI: !Env ${OAUTH_REDIRECT_URI} + FALLBACK_AUTHN_ENABLED: !Env ${FALLBACK_AUTHN_ENABLED} + +https://cmr.uat.earthdata.nasa.gov: + + - name: non-sequential-query-cmr + description: | + testing service configuration with query-cmr with no is_sequential: + data_operation_version: '0.20.0' + type: + <<: *default-turbo-config + params: + <<: *default-turbo-params + env: + <<: *default-turbo-env + STAGING_PATH: public/asf/opera-rtc-s1-browse + umm_s: S1271728813-ASF + maximum_sync_granules: 0 + capabilities: + concatenation: false + subsetting: + bbox: false + variable: false + temporal: false + output_formats: + - image/png + reprojection: true + steps: + - image: !Env ${QUERY_CMR_IMAGE} + - image: !Env ${OPERA_RTC_S1_BROWSE_IMAGE} + - image: !Env ${HYBIG_IMAGE} \ No newline at end of file From 1ab24feea538fd1e877e2fa85971bf8d487e2d16 Mon Sep 17 00:00:00 2001 From: James Norton Date: Fri, 14 Feb 2025 10:07:58 -0500 Subject: [PATCH 4/4] HARMONY-2007: Add docs for is_sequential --- docs/guides/adapting-new-services.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/guides/adapting-new-services.md b/docs/guides/adapting-new-services.md index ca832259c..96585ade3 100644 --- a/docs/guides/adapting-new-services.md +++ b/docs/guides/adapting-new-services.md @@ -38,6 +38,7 @@ then invokes a single service image. If setting up a more complex service chain - [5. Error handling](#5-error-handling) - [6. Defining environment variables in env-defaults](#6-defining-environment-variables-in-env-defaults) - [7. Registering services in services.yml](#7-registering-services-in-servicesyml) + - [Sequential Steps](#sequential-steps) - [Aggregation Steps](#aggregation-steps) - [8. Docker Container Images](#8-docker-container-images) - [9. Recommendations for service implementations](#9-recommendations-for-service-implementations) @@ -135,6 +136,7 @@ The structure of an entry in the [services.yml](../../config/services.yml) file validate_variables: true # Whether to validate the requested variables exist in the CMR. Defaults to true. steps: - image: !Env ${QUERY_CMR_IMAGE} # The image to use for the first step in the chain + is_sequential: true # Required for query-cmr - image: !Env ${HARMONY_EXAMPLE_IMAGE} # The image to use for the second step in the chain ``` @@ -151,6 +153,7 @@ The following `steps` entry is for a chain of services including the PODAAC L2 S ```yaml steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${PODAAC_L2_SUBSETTER_IMAGE} operations: ['spatialSubset', 'variableSubset'] conditional: @@ -168,6 +171,7 @@ There is also a `conditional` option on `umm-c` `native_format` that compares wi ```yaml steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${NET_2_COG_IMAGE} conditional: umm_c: @@ -177,6 +181,13 @@ steps: Here we have the query-cmr service (this service is the first in every current workflow). This is followed by the optional NetCDF to COG service, which will only be invoked when the collection's UMM-C native format is one of the values that are defined (case insensitive) in the steps configuration (i.e. `[netcdf-4]`). Finally, we have the HyBIG service that converts the GeoTIFF inputs from the previous step to Global Imagery Browse Services (GIBS) compatible PNG or JPEG outputs. See [10. Service chaining](#10-service-chaining) for more info. +### Sequential Steps +Most steps will produce all of the pieces of work (known as work-items) for a service immediately when the step begins. This allows all of the work-items to be worked in parallel. It is possible, however, for new work-items for the same service to be produced as the step is being worked. In this case, the work-items must be worked sequentially. Steps that must be worked sequentially should include `is_sequential: true` in their definition. + +An example of this is the query-cmr service. Each invocation of the query-cmr service can only return up to 2000 granules (due to the CMR page size limit), so, if the job has more granules than that, query-cmr is invoked multiple times. Because the number of granules reported by the CMR may change at any time, we cannot know ahead of time exactly how many invocations we need. So, if the job has more granules than 2000, query-cmr is invoked sequentially until all granules are returned. + +For most services `is_sequential: true` is not necessary. + ### Aggregation Steps Services that provide aggregation, e.g., concatenation for CONCISE, require that all inputs are available when they are run. Harmony infers this from the `operations` field in the associated step. @@ -195,6 +206,7 @@ The following `steps` entry is an example one might use for an aggregating servi ```yaml steps: - image: !Env ${QUERY_CMR_IMAGE} + is_sequential: true - image: !Env ${EXAMPLE_AGGREGATING_SERVICE_IMAGE} is_batched: true max_batch_inputs: 100