Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HARMONY-1499: Added ability to use conditional step in services.yml based on native file format info in umm-c. #438

Merged
merged 6 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions app/middleware/cmr-umm-collection-reader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { NextFunction } from 'express';
import { getUmmCollectionsByIds } from '../util/cmr';
import HarmonyRequest from '../models/harmony-request';

/**
* Express.js middleware that reads the UMM JSON format of the collections and load them into operation
*
* @param req - The client request
* @param res - The client response
* @param next - The next function in the middleware chain
*/
async function cmrUmmCollectionReader(req: HarmonyRequest, res, next: NextFunction): Promise<void> {
try {
const hasUmmConditional = req.context.serviceConfig?.steps?.filter((s) => s.conditional?.umm_c);
if (hasUmmConditional && hasUmmConditional.length > 0) {
req.operation.ummCollections = await getUmmCollectionsByIds(req.collectionIds, req.accessToken);
}
next();
} catch (error) {
req.collectionIds = [];
req.collections = [];
next(error);
}
}

export = cmrUmmCollectionReader;
3 changes: 2 additions & 1 deletion app/models/data-operation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import Ajv from 'ajv';
import addFormats from 'ajv-formats';
import _ from 'lodash';
import logger from '../util/log';
import { CmrUmmVariable } from '../util/cmr';
import { CmrUmmCollection, CmrUmmVariable } from '../util/cmr';
import { Encrypter, Decrypter } from '../util/crypto';
import { cmrVarToHarmonyVar, HarmonyVariable } from '../util/variables';

Expand Down Expand Up @@ -293,6 +293,7 @@ export default class DataOperation {

destinationUrl: string;

ummCollections: CmrUmmCollection[];

/**
* Creates an instance of DataOperation.
Expand Down
13 changes: 13 additions & 0 deletions app/models/services/base-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ export interface ServiceStep {
conditional?: {
exists?: string[];
format?: string[];
umm_c?: {
native_format?: string[];
}
};
}

Expand Down Expand Up @@ -159,6 +162,16 @@ function stepRequired(step: ServiceStep, operation: DataOperation): boolean {
required = true;
}
}
if (required && step.conditional?.umm_c) {
if (step.conditional.umm_c.native_format) {
required = false;
const fileArchiveInfo = operation.ummCollections[0].umm.ArchiveAndDistributionInformation?.FileArchiveInformation;
const nativeFormat = fileArchiveInfo?.filter((a) => a.FormatType = 'Native')[0]?.Format;
if (nativeFormat && step.conditional.umm_c.native_format.includes(nativeFormat.toLowerCase())) {
required = true;
}
}
}
return required;
}

Expand Down
2 changes: 2 additions & 0 deletions app/routers/router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import serviceInvoker from '../backends/service-invoker';
import HarmonyRequest, { addRequestContextToOperation } from '../models/harmony-request';

import cmrCollectionReader = require('../middleware/cmr-collection-reader');
import cmrUmmCollectionReader = require('../middleware/cmr-umm-collection-reader');
import envVars = require('../util/env');
import { postServiceConcatenationHandler, preServiceConcatenationHandler } from '../middleware/concatenation';
import getRequestMetrics from '../frontends/request-metrics';
Expand Down Expand Up @@ -194,6 +195,7 @@ export default function router({ skipEarthdataLogin = 'false' }: RouterConfig):
result.use(logged(preServiceConcatenationHandler));
result.use(logged(chooseService));
result.use(logged(postServiceConcatenationHandler));
result.use(logged(cmrUmmCollectionReader));
result.use(logged(cmrGranuleLocator));
result.use(logged(addRequestContextToOperation));
result.use(logged(redirectWithoutTrailingSlash));
Expand Down
55 changes: 55 additions & 0 deletions app/util/cmr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,28 @@ export interface CmrPermissionsResponse extends CmrResponse {
data: CmrPermissionsMap;
}

export interface CmrUmmCollection {
meta: {
// eslint-disable-next-line @typescript-eslint/naming-convention
'concept-id': string;
};
umm: {
ArchiveAndDistributionInformation?: {
FileArchiveInformation: [{
FormatType: string;
Format: string;
}]
}
};
}

export interface CmrUmmCollectionsResponse extends CmrResponse {
data: {
items: CmrUmmCollection[];
hits: number;
};
}

/**
* Create a token header for the given access token string
*
Expand Down Expand Up @@ -595,6 +617,20 @@ async function queryCollections(
return collectionsResponse.data.feed.entry;
}

/**
* Performs a CMR collections.umm_json search with the given query string
*
* @param query - The key/value pairs to search
* @param token - Access token for user request
* @returns The umm collection search results
*/
async function queryUmmCollections(
query: CmrQuery, token: string,
): Promise<Array<CmrUmmCollection>> {
const ummResponse = await _cmrGet('/search/collections.umm_json_v1_17_3', query, token) as CmrUmmCollectionsResponse;
return ummResponse.data.items;
}

/**
* Performs a CMR grids.umm_json search with the given query string
*
Expand Down Expand Up @@ -656,6 +692,25 @@ export function getCollectionsByIds(
return queryCollections(query, token);
}

/**
* Queries and returns the CMR UMM JSON collections corresponding to the given CMR Collection IDs
*
* @param ids - The collection IDs to find
* @param token - Access token for user request
* @param includeTags - Include tags with tag_key matching this value
* @returns The umm collections with the given ids
*/
export function getUmmCollectionsByIds(
ids: Array<string>,
token: string,
): Promise<Array<CmrUmmCollection>> {
const query = {
concept_id: ids,
page_size: cmrMaxPageSize,
};
return queryUmmCollections(query, token);
}

/**
* Queries and returns the CMR JSON collections corresponding to the given collection short names
*
Expand Down
44 changes: 29 additions & 15 deletions docs/guides/adapting-new-services.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,19 +132,33 @@ The following `steps` entry is for a chain of services including the PODAAC L2 S

```yaml
steps:
- image: !Env ${QUERY_CMR_IMAGE}
- image: !Env ${PODAAC_L2_SUBSETTER_IMAGE}
operations: ['spatialSubset', 'variableSubset']
conditional:
exists: ['spatialSubset', 'variableSubset']
- image: !Env ${HARMONY_NETCDF_TO_ZARR_IMAGE}
operations: ['reformat']
conditional:
format: ['application/x-zarr']
- image: !Env ${QUERY_CMR_IMAGE}
- image: !Env ${PODAAC_L2_SUBSETTER_IMAGE}
operations: ['spatialSubset', 'variableSubset']
conditional:
exists: ['spatialSubset', 'variableSubset']
- image: !Env ${HARMONY_NETCDF_TO_ZARR_IMAGE}
operations: ['reformat']
conditional:
format: ['application/x-zarr']
```

First we have the query-cmr service (this service is the first in every current workflow). This is followed by the PODAAC L2 Subsetter service, which provides the 'spatialSubset' and 'variableSubset' operations and is only invoked if the user is requesting one or both of those. Finally, we have the Harmony netcdf-to-zarr service which provides the 'reformat' operation and is only invoked if the request asks for 'zarr' output.

There is also a `conditional` option on `umm-c` `native_format` that compares with the value of the collection UMM-C field: `ArchiveAndDistributionInformationType.FileArchiveInformation.Format` when the sibling FormatType = 'Native'. Here is an example of its usage:

```yaml
steps:
- image: !Env ${QUERY_CMR_IMAGE}
- image: !Env ${NET_2_COG_IMAGE}
conditional:
umm_c:
native_format: ['netcdf-4']
- image: !Env ${HYBIG_IMAGE}
```

Here we have the query-cmr service (this service is the first in every current workflow). This is followed by the optional NetCDF to COG service, which will only be invoked when the collection's UMM-C native format is one of the values that are defined (case insensitive) in the steps configuration (i.e. `[netcdf-4]`). Finally, we have the HyBIG service that converts the GeoTIFF inputs from the previous step to Global Imagery Browse Services (GIBS) compatible PNG or JPEG outputs.

### Aggregation Steps
Services that provide aggregation, e.g., concatenation for CONCISE, require that all inputs are
available when they are run. Harmony infers this from the `operations` field in the associated step.
Expand All @@ -162,12 +176,12 @@ The following `steps` entry is an example one might use for an aggregating servi

```yaml
steps:
- image: !Env ${QUERY_CMR_IMAGE}
- image: !Env ${EXAMPLE_AGGREGATING_SERVICE_IMAGE}
is_batched: true
max_batch_inputs: 100
max_batch_size_in_bytes: 2000000000
operations: ['concatenate']
- image: !Env ${QUERY_CMR_IMAGE}
- image: !Env ${EXAMPLE_AGGREGATING_SERVICE_IMAGE}
is_batched: true
max_batch_inputs: 100
max_batch_size_in_bytes: 2000000000
operations: ['concatenate']
```

There are default limits set by the environment variables `MAX_BATCH_INPUTS` and
Expand Down
56 changes: 56 additions & 0 deletions test/models/services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,13 @@ describe('createWorkflowSteps', function () {
const versionId = '1';
const operation = buildOperation('foo');
operation.addSource(collectionId, shortName, versionId);
// the existence of conditional umm_c in config guarantees that ummCollections is set
operation.ummCollections = [{
'meta': {
'concept-id': 'C1234-TEST',
},
'umm': {},
}];
const config = {
name: 'shapefile-tiff-netcdf-service',
data_operation_version: CURRENT_SCHEMA_VERSION,
Expand All @@ -825,6 +832,14 @@ describe('createWorkflowSteps', function () {
},
steps: [{
image: 'query cmr',
}, {
image: 'format transformer',
operations: ['formatTransform'],
conditional: {
umm_c: {
native_format: ['netcdf-4'],
},
},
}, {
image: 'temporal subsetter',
operations: ['temporalSubset'],
Expand Down Expand Up @@ -1033,4 +1048,45 @@ describe('createWorkflowSteps', function () {
expect(stagingLocation).to.include('dummy/p1');
});
});

describe('when a collection has matching umm-c conditional native_format', function () {
const ummOperation = _.cloneDeep(operation);
ummOperation.geojson = 'interesting shape';
ummOperation.ummCollections = [{
'meta': {
'concept-id': 'C1234-TEST',
},
'umm': {
'ArchiveAndDistributionInformation': {
'FileArchiveInformation': [ {
'Format': 'netCDF-4',
'FormatType': 'Native',
} ],
},
},
}];
const service = new StubService(config, {}, ummOperation);
const steps = service.createWorkflowSteps();

it('is not synchronous', function () {
expect(service.isSynchronous).to.equal(false);
expect(service.operation.isSynchronous).to.equal(false);
});

it('creates three workflow steps', function () {
expect(steps.length).to.equal(3);
});

it('creates a first workflow step for query cmr', function () {
expect(steps[0].serviceID).to.equal('query cmr');
});

it('creates a second step for conditional umm_c native_format', function () {
expect(steps[1].serviceID).to.equal('format transformer');
});

it('creates a third and final workflow step for the shapefile subsetter', function () {
expect(steps[2].serviceID).to.equal('shapefile subsetter');
});
});
});
Loading