podaac · nikki-t · Apr 22, 2024 · Sep 26, 2024 · Sep 26, 2024 · Oct 3, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 ### Security
 
+## [1.5.0]
+
+### Added
+    - Issue 211 - Query track ingest table for granules with "to_ingest" status
+    - Issue 212 - Update track ingest table with granule status
+    - Issue 203 - Construct CNM to trigger load data operations and ingest granule
+    - Issue 236 - Allow UAT query of CMR to support querying in different venues
+    - Issue 250 - Handle overlapping times with unique CRIDS
+### Changed
+    - Issue 251 - Add note to readme to point to documentation
+### Deprecated
+### Removed
+### Fixed
+    - Issue 258 - Granules with very large feature counts cannot be added to hydrocron
+    - Issue 235 - Track ingest table can be populated with granules that aren't loaded into Hydrocron
+    - Issue 248 - Track ingest operations need to query UAT for granule files if track ingest is running in SIT or UAT
+### Security
+
 ## [1.4.1]
 
 ### Added

diff --git a/README.md b/README.md
@@ -1,10 +1,12 @@
 ## Overview
 
-Hydrocron API is a new tool that implements functionalities that will allow 
-hydrologists to have direct access to filtered data from our newest satellites. 
-This innovative tool will provide an effortless way to filter data by feature ID, 
-date range, polygonal area, and more. This data will be returned in formats such 
-as CSV and geoJSON.
+Hydrocron is an API that repackages hydrology datasets from the Surface Water and Ocean Topography (SWOT) satellite into formats that make time-series analysis easier, including GeoJSON and CSV. To use Hydrocron, see the official documentation with examples and tutorials here: [https://podaac.github.io/hydrocron/intro.html](https://podaac.github.io/hydrocron/intro.html)
+
+The following sections of this readme describe how to install and run a development version of Hydrocron locally on your own computer. This is not recommended if you just want to access SWOT data through Hydrocron. To access data, see the documentation linked above.
+
+To contribute to the development of Hydrocron, see the [contributing guidelines](https://github.com/podaac/hydrocron/blob/develop/CONTRIBUTING.md) and browse the open issues.
+
+***NOTE: the following instructions for installing and running a local version of Hydrocron are out of date, and may result in a broken install. We are aware of the issue and working on restoring local development functionality. Please open a new issue or ask a question on the [PO.DAAC forum](https://podaac.jpl.nasa.gov/forum/viewforum.php?f=6) if you need to run a local installation.***
 
 ## Requirements
 

diff --git a/hydrocron/api/data_access/db.py b/hydrocron/api/data_access/db.py
@@ -91,6 +91,44 @@ def _query_hydrocron_table(self, hydrocron_table: str, key_condition_expression:
 
         return items
 
+    def get_series_granule_ur(self, table_name, feature_name, granule_ur):
+        """
+
+        @param table_name: str - Hydrocron table to query
+        @param granule_ur: str - Granule UR
+        @return: dictionary of items
+        """
+
+        hydrocron_table = self._dynamo_instance.Table(table_name)
+        hydrocron_table.load()
+
+        items = hydrocron_table.query(
+            ProjectionExpression=feature_name,
+            IndexName="GranuleURIndex",
+            KeyConditionExpression=(
+                Key("granuleUR").eq(granule_ur)
+            )
+        )
+        last_key_evaluated = ""
+        if "LastEvaluatedKey" in items.keys():
+            last_key_evaluated = items["LastEvaluatedKey"]
+
+        while last_key_evaluated:
+            next_items = hydrocron_table.query(
+                ExclusiveStartKey=last_key_evaluated,
+                ProjectionExpression=feature_name,
+                IndexName="GranuleURIndex",
+                KeyConditionExpression=(
+                    Key("granuleUR").eq(granule_ur)
+                )
+            )
+            items["Items"].extend(next_items["Items"])
+            last_key_evaluated = ""
+            if "LastEvaluatedKey" in next_items.keys():
+                last_key_evaluated = next_items["LastEvaluatedKey"]
+
+        return items["Items"]
+
     def get_granule_ur(self, table_name, granule_ur):
         """
 
@@ -111,3 +149,47 @@ def get_granule_ur(self, table_name, granule_ur):
             )
         )
         return items
+
+    def get_status(self, table_name, status, limit=None):
+        """
+
+        @param table_name: str - Hydrocron table to query
+        @param status: str - Status to query for
+        @param limit: str - Return items up to and including the limit
+        """
+
+        hydrocron_table = self._dynamo_instance.Table(table_name)
+        items = hydrocron_table.query(
+            IndexName="statusIndex",
+            KeyConditionExpression=(Key("status").eq(status))
+        )
+
+        if limit and items["Count"] >= limit:
+            items["Items"] = items["Items"][:limit]
+            if "LastEvaluatedKey" in items.keys():
+                items.pop("LastEvaluatedKey")
+
+        last_key_evaluated = ""
+        if "LastEvaluatedKey" in items.keys():
+            last_key_evaluated = items["LastEvaluatedKey"]
+
+        while last_key_evaluated:
+            next_items = hydrocron_table.query(
+                ExclusiveStartKey=last_key_evaluated,
+                IndexName="statusIndex",
+                KeyConditionExpression=(Key("status").eq(status))
+            )
+
+            items["Items"].extend(next_items["Items"])
+            if limit and items["Count"] >= limit:
+                items["Items"] = items["Items"][:limit]
+                break
+
+            last_key_evaluated = ""
+            if "LastEvaluatedKey" in next_items.keys():
+                last_key_evaluated = next_items["LastEvaluatedKey"]
+
+        if limit and len(items["Items"]) >= limit:
+            items["Items"] = items["Items"][:limit]
+
+        return items["Items"]
diff --git a/hydrocron/db/io/swot_shp.py b/hydrocron/db/io/swot_shp.py
@@ -2,7 +2,6 @@
 Unpacks SWOT Shapefiles
 """
 import os.path
-import json
 import tempfile
 from datetime import datetime, timezone
 from importlib import resources
@@ -109,7 +108,7 @@ def handle_null_geometries(geodf):
     geodf_no_nulls : geopandas.GeoDataFrame
         the geodataframe with null geometries handled
     """
-
+    logging.info('Starting handle null geometries')
     geodf['geometry'].fillna(
         value=Polygon(constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS),
         inplace=True)
@@ -131,6 +130,8 @@ def convert_polygon_to_centerpoint(geodf_polygon):
     geodf_centerpoint : geopandas.GeoDataFrame
         the geodataframe with point feature types and calculated centerpoint geometries
     """
+    logging.info('Starting convert polygon to centerpoint')
+
     geodf_centerpoint = geodf_polygon
     geodf_centerpoint['geometry'] = geodf_polygon['geometry'].centroid
 
@@ -152,6 +153,7 @@ def parse_metadata_from_shpxml(xml_elem):
     metadata_attrs : dict
         a dictionary of metadata attributes to add to record
     """
+    logging.info('Starting parse metadata from shpfile')
     # get SWORD version
     for globs in xml_elem.findall('global_attributes'):
         prior_db_files = globs.find('xref_prior_river_db_files').text
@@ -191,18 +193,13 @@ def assemble_attributes(geodf, attributes):
     attributes : dict
         A dictionary of attributes to concatenate
     """
+    logging.info('Starting assemble attributes')
 
     items = []
-    # rework to use dataframe instead of file as string
-    for _index, row in geodf.iterrows():
-
-        shp_attrs = json.loads(
-            row.to_json(default_handler=str))
 
-        item_attrs = shp_attrs | attributes
-
-        item_attrs = {key: str(item_attrs[key]) for key in item_attrs.keys()}
-        items.append(item_attrs)
+    geodf = geodf.astype(str)
+    geodf = geodf.assign(**attributes)
+    items = geodf.to_dict('records')
 
     return items
 
@@ -222,7 +219,7 @@ def parse_from_filename(filename):
     filename_attrs : dict
         A dictionary of attributes from the filename
     """
-
+    logging.info('Starting parse attributes from filename')
     filename_components = filename.split("_")
 
     collection = ""

diff --git a/hydrocron/db/load_data.py b/hydrocron/db/load_data.py
@@ -133,6 +133,9 @@ def granule_handler(event, _):
     if ("LakeSP_Prior" in granule_path) & (table_name != constants.SWOT_PRIOR_LAKE_TABLE_NAME):
         raise TableMisMatch(f"Error: Cannot load Prior Lake data into table: '{table_name}'")
 
+    if ("LakeSP_Obs" in granule_path) | ("LakeSP_Unassigned" in granule_path):
+        raise TableMisMatch(f"Error: Cannot load Observed or Unassigned Lake data into table: '{table_name}'")
+
     logging.info("Value of load_benchmarking_data is: %s", load_benchmarking_data)
 
     obscure_data = "true" in os.getenv("OBSCURE_DATA").lower()
@@ -362,8 +365,12 @@ def load_data(dynamo_resource, table_name, items):
             logging.info("Item %s: %s", feature_id, items[i][feature_id])
         hydrocron_table.batch_fill_table(items)
 
+        logging.info("Finished loading %s items", len(items))
+
     else:
         logging.info("Adding %s items to table individually", feature_name)
         for item_attrs in items:
             logging.info("Item %s: %s", feature_id, item_attrs[feature_id])
             hydrocron_table.add_data(**item_attrs)
+
+        logging.info("Finished loading %s items", len(items))
diff --git a/hydrocron/db/schema.py b/hydrocron/db/schema.py
@@ -115,11 +115,6 @@ def batch_fill_table(self, items):
         try:
             with table.batch_writer() as writer:
                 for item in items:
-                    logger.info(
-                        "Item %s size: %s",
-                        item[self.partition_key_name],
-                        str(sys.getsizeof(item))
-                    )
                     if sys.getsizeof(item) < 300000:
                         writer.put_item(Item=item)
                     else: