Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 258 - Granules with large numbers of features cannot be loadedd #259

Merged
merged 7 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Deprecated
### Removed
### Fixed
- Issue 258 - Granules with very large feature counts cannot be added to hydrocron
- Issue 235 - Track ingest table can be populated with granules that aren't loaded into Hydrocron
- Issue 248 - Track ingest operations need to query UAT for granule files if track ingest is running in SIT or UAT
### Security
Expand Down
21 changes: 9 additions & 12 deletions hydrocron/db/io/swot_shp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Unpacks SWOT Shapefiles
"""
import os.path
import json
import tempfile
from datetime import datetime, timezone
from importlib import resources
Expand Down Expand Up @@ -109,7 +108,7 @@ def handle_null_geometries(geodf):
geodf_no_nulls : geopandas.GeoDataFrame
the geodataframe with null geometries handled
"""

logging.info('Starting handle null geometries')
geodf['geometry'].fillna(
value=Polygon(constants.SWOT_PRIOR_LAKE_FILL_GEOMETRY_COORDS),
inplace=True)
Expand All @@ -131,6 +130,8 @@ def convert_polygon_to_centerpoint(geodf_polygon):
geodf_centerpoint : geopandas.GeoDataFrame
the geodataframe with point feature types and calculated centerpoint geometries
"""
logging.info('Starting convert polygon to centerpoint')

geodf_centerpoint = geodf_polygon
geodf_centerpoint['geometry'] = geodf_polygon['geometry'].centroid

Expand All @@ -152,6 +153,7 @@ def parse_metadata_from_shpxml(xml_elem):
metadata_attrs : dict
a dictionary of metadata attributes to add to record
"""
logging.info('Starting parse metadata from shpfile')
# get SWORD version
for globs in xml_elem.findall('global_attributes'):
prior_db_files = globs.find('xref_prior_river_db_files').text
Expand Down Expand Up @@ -191,18 +193,13 @@ def assemble_attributes(geodf, attributes):
attributes : dict
A dictionary of attributes to concatenate
"""
logging.info('Starting assemble attributes')

items = []
# rework to use dataframe instead of file as string
for _index, row in geodf.iterrows():

shp_attrs = json.loads(
row.to_json(default_handler=str))

item_attrs = shp_attrs | attributes

item_attrs = {key: str(item_attrs[key]) for key in item_attrs.keys()}
items.append(item_attrs)
geodf = geodf.astype(str)
geodf = geodf.assign(**attributes)
items = geodf.to_dict('records')

return items

Expand All @@ -222,7 +219,7 @@ def parse_from_filename(filename):
filename_attrs : dict
A dictionary of attributes from the filename
"""

logging.info('Starting parse attributes from filename')
filename_components = filename.split("_")

collection = ""
Expand Down
4 changes: 4 additions & 0 deletions hydrocron/db/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,12 @@ def load_data(dynamo_resource, table_name, items):
logging.info("Item %s: %s", feature_id, items[i][feature_id])
hydrocron_table.batch_fill_table(items)

logging.info("Finished loading %s items", len(items))

else:
logging.info("Adding %s items to table individually", feature_name)
for item_attrs in items:
logging.info("Item %s: %s", feature_id, item_attrs[feature_id])
hydrocron_table.add_data(**item_attrs)

logging.info("Finished loading %s items", len(items))
5 changes: 0 additions & 5 deletions hydrocron/db/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,6 @@ def batch_fill_table(self, items):
try:
with table.batch_writer() as writer:
for item in items:
logger.info(
"Item %s size: %s",
item[self.partition_key_name],
str(sys.getsizeof(item))
)
if sys.getsizeof(item) < 300000:
writer.put_item(Item=item)
else:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/api_query_results_csv.csv

Large diffs are not rendered by default.

Loading