Skip to content

Commit

Permalink
Feature/issue 235 - Track ingest table can be populated with granules…
Browse files Browse the repository at this point in the history
… that aren't loaded into Hydrocron (#245)

* Raise an error if collection shortname does not match Hydrocron table names

* Raise an error unsupported lake data in load granule operations

* Remove trailing whitespace

* Fix code formatting

* Update CHANGELOG with issue

* Feature/issue 248 - Track ingest operations need to query UAT for granule files (#249)

* Query to return granule files should query UAT when running in SIT or UAT environments

* SIT execution should return UAT files for load granule operations

* Set venue environment variable before running test of query_cmr

* Add issue to CHANGELOG
  • Loading branch information
nikki-t authored Oct 17, 2024
1 parent a57a870 commit 9a2c2a3
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 35 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Deprecated
### Removed
### Fixed
- Issue 235 - Track ingest table can be populated with granules that aren't loaded into Hydrocron
- Issue 248 - Track ingest operations need to query UAT for granule files if track ingest is running in SIT or UAT
### Security

## [1.4.1]
Expand Down
3 changes: 3 additions & 0 deletions hydrocron/db/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ def granule_handler(event, _):
if ("LakeSP_Prior" in granule_path) & (table_name != constants.SWOT_PRIOR_LAKE_TABLE_NAME):
raise TableMisMatch(f"Error: Cannot load Prior Lake data into table: '{table_name}'")

if ("LakeSP_Obs" in granule_path) | ("LakeSP_Unassigned" in granule_path):
raise TableMisMatch(f"Error: Cannot load Observed or Unassigned Lake data into table: '{table_name}'")

logging.info("Value of load_benchmarking_data is: %s", load_benchmarking_data)

obscure_data = "true" in os.getenv("OBSCURE_DATA").lower()
Expand Down
33 changes: 25 additions & 8 deletions hydrocron/db/track_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

# Application Imports
from hydrocron.api.data_access.db import DynamoDataRepository
from hydrocron.db.load_data import load_data
from hydrocron.utils import connection
from hydrocron.db.load_data import load_data, TableMisMatch
from hydrocron.utils import connection, constants


logging.getLogger().setLevel(logging.INFO)
Expand Down Expand Up @@ -109,31 +109,27 @@ def query_cmr(self, temporal):
bearer_token = self._get_bearer_token()
granules = query.short_name(self.SHORTNAME[self.collection_shortname]) \
.temporal(self.query_start, self.query_end) \
.format("umm_json") \
.mode(CMR_UAT) \
.bearer_token(bearer_token) \
.get(query.hits())
granules = self._filter_granules(granules)
else:
granules = query.short_name(self.collection_shortname) \
.temporal(self.query_start, self.query_end) \
.format("umm_json") \
.get(query.hits())
else:
logging.info("Querying CMR revision_date range: %s to %s.", self.query_start, self.query_end)
if self.ENV in ("sit", "uat"):
bearer_token = self._get_bearer_token()
granules = query.short_name(self.SHORTNAME[self.collection_shortname]) \
.revision_date(self.query_start, self.query_end) \
.format("umm_json") \
.mode(CMR_UAT) \
.bearer_token(bearer_token) \
.get(query.hits())
granules = self._filter_granules(granules)
else:
granules = query.short_name(self.collection_shortname) \
.revision_date(self.query_start, self.query_end) \
.format("umm_json") \
.get(query.hits())

cmr_granules = {}
Expand Down Expand Up @@ -295,7 +291,15 @@ def _query_granule_files(self, granule_ur):
"""

query = GranuleQuery()
granules = query.short_name(self.collection_shortname).readable_granule_name(granule_ur).format("umm_json").get_all()
query = query.short_name(self.collection_shortname).readable_granule_name(granule_ur).format("umm_json")

if self.ENV in ("sit", "uat"):
bearer_token = self._get_bearer_token()
query = query.bearer_token(bearer_token) \
.mode(CMR_UAT) \
.short_name(self.SHORTNAME[self.collection_shortname])

granules = query.get_all()
cnm_files = []
for granule in granules:
granule_json = json.loads(granule)
Expand All @@ -313,7 +317,7 @@ def _query_granule_files(self, granule_ur):
for granule_url in granule_item["umm"]["RelatedUrls"]:
if granule_url["Type"] == "GET DATA VIA DIRECT ACCESS":
if self.ENV in ("sit", "uat"):
cnm_file["uri"] = granule_url["URL"].replace("ops", "uat")
cnm_file["uri"] = granule_url["URL"].replace("sit", "uat")
else:
cnm_file["uri"] = granule_url["URL"]
cnm_files.append(cnm_file)
Expand Down Expand Up @@ -359,6 +363,19 @@ def track_ingest_handler(event, context):
hydrocron_table = event["hydrocron_table"]
hydrocron_track_table = event["hydrocron_track_table"]
temporal = "temporal" in event.keys()

if ("reach" in collection_shortname) and ((hydrocron_table != constants.SWOT_REACH_TABLE_NAME)
or (hydrocron_track_table != constants.SWOT_REACH_TRACK_INGEST_TABLE_NAME)):
raise TableMisMatch(f"Error: Cannot query reach data for tables: '{hydrocron_table}' and '{hydrocron_track_table}'")

if ("node" in collection_shortname) and ((hydrocron_table != constants.SWOT_NODE_TABLE_NAME)
or (hydrocron_track_table != constants.SWOT_NODE_TRACK_INGEST_TABLE_NAME)):
raise TableMisMatch(f"Error: Cannot query node data for tables: '{hydrocron_table}' and '{hydrocron_track_table}'")

if ("prior" in collection_shortname) and ((hydrocron_table != constants.SWOT_PRIOR_LAKE_TABLE_NAME)
or (hydrocron_track_table != constants.SWOT_PRIOR_LAKE_TRACK_INGEST_TABLE_NAME)):
raise TableMisMatch(f"Error: Cannot query prior lake data for tables: '{hydrocron_table}' and '{hydrocron_track_table}'")

if temporal:
query_start = datetime.datetime.strptime(event["query_start"], "%Y-%m-%dT%H:%M:%S").replace(tzinfo=timezone.utc)
query_end = datetime.datetime.strptime(event["query_end"], "%Y-%m-%dT%H:%M:%S").replace(tzinfo=timezone.utc)
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def mock_sns():
os.environ["AWS_DEFAULT_REGION"] = "us-west-2"

sns = boto3.client("sns")
sns.create_topic(Name="svc-hydrocron-sit-cnm-response")
sns.create_topic(Name="svc-hydrocron-test-cnm-response")

yield sns

Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/track_ingest_cnm_message.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"checksumType": "md5",
"checksum": "6ce27e868bd90055252de186f554759f",
"size": 745878,
"uri": "s3://podaac-swot-uat-cumulus-protected/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip"
"uri": "s3://podaac-swot-ops-cumulus-protected/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip"
}
]
}
Expand Down
21 changes: 21 additions & 0 deletions tests/test_hydrocron_database.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

"""
==============
test_create_table.py
Expand Down Expand Up @@ -72,3 +74,22 @@ def test_delete_item(hydrocron_dynamo_table):
partition_key=constants.TEST_REACH_ID_VALUE,
sort_key=constants.TEST_REACH_TIME_VALUE)
assert hydrocron_dynamo_table.table.item_count == 686


def test_track_table_mismatch():
"""
Test track ingest table name mismatch with granule UR.
"""
import hydrocron.db.load_data

event = {
"body": {
"granule_path": "s3://podaac-swot-sit-cumulus-protected/SWOT_L2_HR_LakeSP_2.0/SWOT_L2_HR_LakeSP_Obs_020_150_EU_20240825T234434_20240825T235245_PIC0_01.zip",
"table_name": "hydrocron-swot-prior-lake-table",
"load_benchmarking_data": "False",
"track_table": "hydrocron-swot-prior-lake-track-ingest-table"
}
}
with pytest.raises(hydrocron.db.load_data.TableMisMatch) as e:
hydrocron.db.load_data.granule_handler(event, None)
assert str(e.value) == "Error: Cannot load Observed or Unassigned Lake data into table: 'hydrocron-swot-prior-lake-table'"
32 changes: 28 additions & 4 deletions tests/test_track_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from moto.core import DEFAULT_ACCOUNT_ID
from moto.sns import sns_backends
import pytest
import vcr

from hydrocron.utils import constants
Expand All @@ -19,14 +20,14 @@ def test_query_cmr(mock_ssm):
Uses vcrpy to record CMR API response.
"""
os.environ['HYDROCRON_ENV'] = 'OPS'
from hydrocron.db.track_ingest import Track

collection_shortname = "SWOT_L2_HR_RiverSP_reach_2.0"
collection_start_date = datetime.datetime.strptime("20240630", "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
track = Track(collection_shortname, collection_start_date)
track.query_start = datetime.datetime(2024, 6, 30, 0, 0, 0, tzinfo=datetime.timezone.utc)
track.query_end = datetime.datetime(2024, 6, 30, 12, 0, 0, tzinfo=datetime.timezone.utc)
track.ENV = "OPS"

vcr_cassette = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) \
.joinpath('vcr_cassettes').joinpath('cmr_query.yaml')
Expand Down Expand Up @@ -301,19 +302,42 @@ def test_track_ingest_publish_cnm(track_ingest_cnm_fixture):
"actual_feature_count": 0,
"status": "to_ingest"
}]
track.ENV = "sit"
track.ENV = "test"

vcr_cassette = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) \
.joinpath('vcr_cassettes').joinpath('publish_cnm.yaml')
with vcr.use_cassette(vcr_cassette, decode_compressed_response=True):
track.publish_cnm_ingest(DEFAULT_ACCOUNT_ID)

sns_backend = sns_backends[DEFAULT_ACCOUNT_ID]["us-west-2"]
actual = json.loads(sns_backend.topics[f"arn:aws:sns:us-west-2:{DEFAULT_ACCOUNT_ID}:svc-hydrocron-sit-cnm-response"].sent_notifications[0][1])
actual = json.loads(sns_backend.topics[f"arn:aws:sns:us-west-2:{DEFAULT_ACCOUNT_ID}:svc-hydrocron-test-cnm-response"].sent_notifications[0][1])

expected_file = (pathlib.Path(os.path.dirname(os.path.realpath(__file__)))
.joinpath('test_data').joinpath('track_ingest_cnm_message.json'))
with open(expected_file) as jf:
expected = json.load(jf)

assert actual == expected

def test_track_ingest_mismatch():
"""Test cases where incorrect combination of shortname and table names are
passed to track ingest operations."""

import hydrocron.db.track_ingest

class LambdaContext:
def __init__(self):
self.invoked_function_arn = "arn:aws:lambda:us-west-2:12345678910:function:svc-hydrocron-sit-track-ingest-lambda"

event = {
"collection_shortname": "SWOT_L2_HR_LakeSP_prior_2.0",
"hydrocron_table": "hydrocron-swot-prior-lake-table",
"hydrocron_track_table": "hydrocron-swot-reach-track-ingest-table",
"temporal": "",
"query_start": "2024-09-05T23:00:00",
"query_end": "2024-09-05T23:59:59"
}
context = LambdaContext()
with pytest.raises(hydrocron.db.track_ingest.TableMisMatch) as e:
hydrocron.db.track_ingest.track_ingest_handler(event, context)
assert str(e.value) == "Error: Cannot query prior lake data for tables: 'hydrocron-swot-prior-lake-table' and 'hydrocron-swot-reach-track-ingest-table'"
42 changes: 21 additions & 21 deletions tests/vcr_cassettes/publish_cnm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ interactions:
uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SWOT_L2_HR_RiverSP_reach_2.0&readable_granule_name%5B%5D=SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip&options%5Breadable_granule_name%5D%5Bpattern%5D=true&page_size=0
response:
body:
string: '{"hits":1,"took":105,"items":[]}'
string: '{"hits":1,"took":95,"items":[]}'
headers:
Access-Control-Allow-Origin:
- '*'
Expand All @@ -24,19 +24,19 @@ interactions:
CMR-Hits:
- '1'
CMR-Request-Id:
- e3986cbe-e92f-4f66-9d3a-135846d23a3c
- 9f40c39a-12cc-4033-b67a-cb8fee448bc5
CMR-Took:
- '106'
- '95'
Connection:
- keep-alive
Content-MD5:
- efb24cae14e6d340937153b1c3fcd8e7
- 461384cf862923b25d050408730eacc0
Content-SHA1:
- 5f45a4e901063718301c91a36cdd34550fb0ca50
- c6ee7948f5b33a1d02a79d1cd64cecafb8a78d11
Content-Type:
- application/vnd.nasa.cmr.umm_results+json;version=1.6.6; charset=utf-8
Date:
- Thu, 10 Oct 2024 21:05:46 GMT
- Fri, 11 Oct 2024 20:42:19 GMT
Server:
- ServerTokens ProductOnly
Strict-Transport-Security:
Expand All @@ -46,23 +46,23 @@ interactions:
Vary:
- Accept-Encoding, User-Agent
Via:
- 1.1 27b5e48b551f58a537d4366486948250.cloudfront.net (CloudFront)
- 1.1 c4e50e26fcbefa29d8d1be685a1f4242.cloudfront.net (CloudFront)
X-Amz-Cf-Id:
- -dGCq9LYLoXF9oC5tqf6ZHugYJRfeqrk1uQw1IIpqRc8mTdbvzKVkA==
- OGrkgQzrwmJ_wu5tuaqCMCdg3rdW0DUZHyOoAZG4qphS18EuJmrEcA==
X-Amz-Cf-Pop:
- LAX54-P3
- LAX50-C1
X-Cache:
- Miss from cloudfront
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-Request-Id:
- -dGCq9LYLoXF9oC5tqf6ZHugYJRfeqrk1uQw1IIpqRc8mTdbvzKVkA==
- OGrkgQzrwmJ_wu5tuaqCMCdg3rdW0DUZHyOoAZG4qphS18EuJmrEcA==
X-XSS-Protection:
- 1; mode=block
content-length:
- '32'
- '31'
status:
code: 200
message: OK
Expand All @@ -81,7 +81,7 @@ interactions:
uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SWOT_L2_HR_RiverSP_reach_2.0&readable_granule_name%5B%5D=SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip&options%5Breadable_granule_name%5D%5Bpattern%5D=true&page_size=1
response:
body:
string: '{"hits":1,"took":74,"items":[{"meta":{"concept-type":"granule","concept-id":"G3232820892-POCLOUD","revision-id":1,"native-id":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","collection-concept-id":"C2799438303-POCLOUD","provider-id":"POCLOUD","format":"application/vnd.nasa.cmr.umm+json","revision-date":"2024-09-09T02:57:10.684Z"},"umm":{"TemporalExtent":{"RangeDateTime":{"EndingDateTime":"2024-09-05T23:31:35.589Z","BeginningDateTime":"2024-09-05T23:31:34.815Z"}},"GranuleUR":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","AdditionalAttributes":[{"Values":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"],"Name":"TILE"}],"MeasuredParameters":[{"QAStats":{"QAPercentMissingData":0,"QAPercentOutOfBoundsData":0},"ParameterName":"N/A"}],"SpatialExtent":{"HorizontalSpatialDomain":{"Geometry":{"BoundingRectangles":[{"WestBoundingCoordinate":-130.8307934819651,"SouthBoundingCoordinate":52.93360004865338,"EastBoundingCoordinate":-125.58639150138737,"NorthBoundingCoordinate":58.82204893062929}]},"Track":{"Cycle":20,"Passes":[{"Pass":457,"Tiles":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"]}]}}},"ProviderDates":[{"Type":"Insert","Date":"2024-09-09T01:25:25.739Z"},{"Type":"Update","Date":"2024-09-09T01:25:25.739Z"}],"CollectionReference":{"Version":"2.0","ShortName":"SWOT_L2_HR_RiverSP_reach_2.0"},"PGEVersionClass":{"PGEName":"PGE_L2_HR_RiverSP","PGEVersion":"5.0.4"},"RelatedUrls":[{"URL":"https://archive.swot.podaac.earthdata.nasa.gov/podaac-swot-ops-cumulus-public/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Description":"Download
string: '{"hits":1,"took":92,"items":[{"meta":{"concept-type":"granule","concept-id":"G3232820892-POCLOUD","revision-id":1,"native-id":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","collection-concept-id":"C2799438303-POCLOUD","provider-id":"POCLOUD","format":"application/vnd.nasa.cmr.umm+json","revision-date":"2024-09-09T02:57:10.684Z"},"umm":{"TemporalExtent":{"RangeDateTime":{"EndingDateTime":"2024-09-05T23:31:35.589Z","BeginningDateTime":"2024-09-05T23:31:34.815Z"}},"GranuleUR":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","AdditionalAttributes":[{"Values":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"],"Name":"TILE"}],"MeasuredParameters":[{"QAStats":{"QAPercentMissingData":0,"QAPercentOutOfBoundsData":0},"ParameterName":"N/A"}],"SpatialExtent":{"HorizontalSpatialDomain":{"Geometry":{"BoundingRectangles":[{"WestBoundingCoordinate":-130.8307934819651,"SouthBoundingCoordinate":52.93360004865338,"EastBoundingCoordinate":-125.58639150138737,"NorthBoundingCoordinate":58.82204893062929}]},"Track":{"Cycle":20,"Passes":[{"Pass":457,"Tiles":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"]}]}}},"ProviderDates":[{"Type":"Insert","Date":"2024-09-09T01:25:25.739Z"},{"Type":"Update","Date":"2024-09-09T01:25:25.739Z"}],"CollectionReference":{"Version":"2.0","ShortName":"SWOT_L2_HR_RiverSP_reach_2.0"},"PGEVersionClass":{"PGEName":"PGE_L2_HR_RiverSP","PGEVersion":"5.0.4"},"RelatedUrls":[{"URL":"https://archive.swot.podaac.earthdata.nasa.gov/podaac-swot-ops-cumulus-public/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Description":"Download
SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Type":"EXTENDED
METADATA"},{"URL":"s3://podaac-swot-ops-cumulus-public/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Description":"This
link provides direct download access via S3 to the granule","Type":"EXTENDED
Expand Down Expand Up @@ -114,21 +114,21 @@ interactions:
CMR-Hits:
- '1'
CMR-Request-Id:
- 21ec8088-e6d8-4bbe-b659-fd9a86915dc8
- f3bd451b-250e-464f-aca3-261d20a36ae4
CMR-Search-After:
- '["pocloud",1725579094815,3232820892]'
CMR-Took:
- '75'
- '93'
Connection:
- keep-alive
Content-MD5:
- 77046ee32e3289cf25ac89b8a8f2e431
- 544062937074509d57e9507aa95f44a3
Content-SHA1:
- dab6e5694fc519744720a256a21ea1c522f2218f
- 4df473f06f2f0b28ee0e328160064de80869bf4f
Content-Type:
- application/vnd.nasa.cmr.umm_results+json;version=1.6.6; charset=utf-8
Date:
- Thu, 10 Oct 2024 21:05:46 GMT
- Fri, 11 Oct 2024 20:42:20 GMT
Server:
- ServerTokens ProductOnly
Strict-Transport-Security:
Expand All @@ -138,19 +138,19 @@ interactions:
Vary:
- Accept-Encoding, User-Agent
Via:
- 1.1 e4cf17e75928ca088d52aecb26f1f1da.cloudfront.net (CloudFront)
- 1.1 92dd5512d5f290fe351674f3051d6d82.cloudfront.net (CloudFront)
X-Amz-Cf-Id:
- YohmjCui14QWvPVi2hcsgadZt5eKbVh6WQscK946z7WnDuV74O2BRg==
- n2l1zw3GXkohQmQItnRDXs5Rh3HPLrSzLvWEeWDpaCjBvUH5Dm7_fw==
X-Amz-Cf-Pop:
- LAX54-P3
- LAX50-C1
X-Cache:
- Miss from cloudfront
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-Request-Id:
- YohmjCui14QWvPVi2hcsgadZt5eKbVh6WQscK946z7WnDuV74O2BRg==
- n2l1zw3GXkohQmQItnRDXs5Rh3HPLrSzLvWEeWDpaCjBvUH5Dm7_fw==
X-XSS-Protection:
- 1; mode=block
content-length:
Expand Down

0 comments on commit 9a2c2a3

Please sign in to comment.