Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAS-1901: Adding publishVariableDrafts field to publish generated variable drafts. #79

Merged
merged 11 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,71 @@ variables:
}
}

#### Publish Generated Collection Variable Drafts

For all supported arguments and columns, see [the schema](src/types/variable.graphql).

CMR-GraphQL queries an earthdata-varinfo lambda in order to generate and publish collection variable drafts. The resulting variables can be returned as part of the Variable type response.

`publishVariableDrafts` will return collection generated variables, using the earthdata-varinfo project(https://github.com/nasa/earthdata-varinfo)

##### Example Mutation

mutation PublishGeneratedVariables($conceptId: String!) {
publishGeneratedVariables(conceptId: $conceptId) {
count
items {
conceptId
dataType
definition
dimensions
longName
name
standardName
units
metadataSpecification
}
}
}

variables:
{
"conceptId": "C1000000001-EXAMPLE"
}

##### Example Response

{
"data": {
"publishGeneratedVariables": {
"count": 1,
"items": [
{
"conceptId": "V1000000001-EXAMPLE",
"dataType": "int32",
"definition": "Grid/time",
"dimensions": [
{
"Name": "Grid/time",
"Size": 1,
"Type": "TIME_DIMENSION"
}
],
"longName": "Grid/time",
"name": "Grid/time",
"standardName": "time",
"units": "seconds since 1970-01-01 00:00:00 UTC",
"metadataSpecification": {
"URL": "https://cdn.earthdata.nasa.gov/umm/variable/v1.8.2",
"Name": "UMM-Var",
"Version": "1.8.2"
}
}
]
}
}
}

#### Drafts

For all supported arguments and columns, see [the schema](src/types/draft.graphql).
Expand Down Expand Up @@ -961,6 +1026,7 @@ variables:
"nativeId": "tool-1",
"ummVersion": "1.2.0"
}

#### Local graph database:

Normally running GraphQl with `serverless offline` will utilize the `(cmr.earthdata.nasa.gov/graphdb)` endpoint, to query against related collections and duplicate collections in the graph database. To send queries to a locally running graph database, we can use a docker gremlin-server that exposes an HTTP endpoint. This is launched by running
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
earthdata-varinfo==1.0.1
earthdata-varinfo==2.1.0
8 changes: 2 additions & 6 deletions src/datasources/collectionVariableDrafts.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@ import { parseError } from '../utils/parseError'
export default async (params, context) => {
const { headers } = context

const { authorization: authorizationHeader = '' } = downcaseKeys(headers)

// Split the token by a space (Bearer xxx.xxx.xxx) and take the
// last element to get just the token without the 'Bearer' identifier
const token = authorizationHeader.split(' ').pop()
const { authorization: authHeader } = downcaseKeys(headers)

const lambdaClient = new LambdaClient(getLambdaConfig())

Expand All @@ -30,7 +26,7 @@ export default async (params, context) => {
LogType: LogType.Tail,
Payload: JSON.stringify({
...params,
token
authHeader
})
})

Expand Down
48 changes: 24 additions & 24 deletions src/earthdataVarinfo/handler.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Module providing access to environment variables"""
import os
from tempfile import TemporaryDirectory

from varinfo import VarInfoFromNetCDF4
from varinfo.cmr_search import get_granules, get_granule_link, download_granule
from varinfo.umm_var import get_all_umm_var
# Needed by serverless python requirements because we are setting zip to true
try:
import unzip_requirements
except ImportError:
pass

from varinfo.generate_umm_var import generate_collection_umm_var

def main(event, context):
"""Handler that calls the earthdata-varinfo library"""
Expand All @@ -16,35 +19,29 @@ def main(event, context):
collection_concept_id = event.get('conceptId')

# Get token
token = event.get('token')
auth_header = event.get('authHeader')

if event.get('publish') is None:
publish = False
else:
publish = event.get('publish')

# These two arguments are required for varinfo, return an error if they are not provided
if collection_concept_id is None or token is None:
if collection_concept_id is None or auth_header is None:
return {
'isBase64Encoded': False,
'statusCode': 500,
'body': {
'error': 'Collection Concept ID and Token must be provided.'
'error': 'Collection Concept ID and Authentication Header must be provided.'
}
}

try:
# Retrieve a list of 10 granules for the collection
granules = get_granules(collection_concept_id, cmr_env=cmr_url, token=token)

# Get the URL for the first granule (NetCDF-4 file):
granule_link = get_granule_link(granules)

# Make a temporary directory:
with TemporaryDirectory() as temp_dir:
# Download file to lambda runtime environment
local_granule = download_granule(granule_link, token, out_directory=temp_dir)

# Parse the granule with VarInfo:
var_info = VarInfoFromNetCDF4(local_granule)

# Generate all the UMM-Var records:
all_variables = get_all_umm_var(var_info)
# Generate all the UMM-Var records:
all_variables = generate_collection_umm_var(collection_concept_id,
auth_header=auth_header,
cmr_env=cmr_url,
publish=publish)
except Exception as error:
return {
'isBase64Encoded': False,
Expand All @@ -54,9 +51,12 @@ def main(event, context):
}
}

if publish == True:
all_variables = [{'conceptId': item} for item in all_variables]

# Return a successful response
return {
'isBase64Encoded': False,
'statusCode': 200,
'body': list(all_variables.values())
'body': list(all_variables)
}
9 changes: 9 additions & 0 deletions src/earthdataVarinfo/test/expected_publish_response.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"isBase64Encoded": false,
"statusCode": 200,
"body": [
{"conceptId": "V0001-TEST"},
{"conceptId": "V0002-TEST"}
]
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add newline

Binary file removed src/earthdataVarinfo/test/sample.HDF5
Binary file not shown.
56 changes: 42 additions & 14 deletions src/earthdataVarinfo/test/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,65 @@
class HandlerTest(TestCase):
''' A class for testing main function of handler.py
'''
def test_missing_token(self):
''' Test when main is called with a missing token in the event paramter
def test_missing_auth_header(self):
''' Test when main is called with a missing auth_header in the event paramter
'''
response = main({'conceptId': 'C1234-TEST'}, "")
expected_response = {'body': {'error': 'Collection Concept ID and Token must be provided.'},
expected_response = {'body': {'error': 'Collection Concept ID and Authentication Header must be provided.'},
'isBase64Encoded': False,
'statusCode': 500}
self.assertEqual(response, expected_response)

def test_missing_concept_id(self):
''' Test when main is called with a missing conceptId in the event paramter
'''
response = main({'token': 'faketoken'}, "")
expected_response = {'body': {'error': 'Collection Concept ID and Token must be provided.'},
response = main({'authHeader': 'fake header'}, "")
expected_response = {'body': {'error': 'Collection Concept ID and Authentication Header must be provided.'},
'isBase64Encoded': False,
'statusCode': 500}
self.assertEqual(response, expected_response)

@patch('handler.get_granules')
@patch('handler.get_granule_link')
@patch('handler.download_granule')
def test_good_case(self, mock_download_granule, mock_get_granule_link, mock_get_granules):
@patch('handler.generate_collection_umm_var')
def test_generate_case(self, mock_generate_collection_umm_var):
''' Test when main is called successfully
'''
mock_download_granule.return_value = 'test/sample.HDF5'
mock_get_granule_link.return_value = 'Mock link'
mock_get_granules.return_value = 'Mock granules'
# Specify the path to your JSON file
file_path = 'test/variables.json'

# Open the JSON file for reading
with open(file_path, 'r') as json_file:
# Use json.load() to parse the JSON data into a Python variable
mock_response = json.load(json_file)

# Set the mock's return value
mock_generate_collection_umm_var.return_value = mock_response

# Call the main function
response = main({'authHeader': 'fake header', 'conceptId': 'C1234-TEST'}, "")

# Specify the path to your JSON file
file_path = 'test/expected_generate_response.json'

# Open the JSON file for reading
with open(file_path, 'r') as json_file:
# Use json.load() to parse the JSON data into a Python variable
expected_response = json.load(json_file)

self.maxDiff = None
self.assertEqual(response, expected_response)

@patch('handler.generate_collection_umm_var')
def test_publish_case(self, mock_generate_collection_umm_var):
''' Test when main is called successfully
'''
# Set the mock's return value
mock_generate_collection_umm_var.return_value = ['V0001-TEST', 'V0002-TEST']

# Call the main function
response = main({'authHeader': 'fake header', 'conceptId': 'C1234-TEST', 'publish': True}, "")

response = main({'token': 'faketoken', 'conceptId': 'C1234-TEST'}, "")
# Specify the path to your JSON file
file_path = 'test/expected_response.json'
file_path = 'test/expected_publish_response.json'

# Open the JSON file for reading
with open(file_path, 'r') as json_file:
Expand Down
Loading