From 7e926fb78c696c0f80f7891e83e591bfdac4acb9 Mon Sep 17 00:00:00 2001 From: vggonzal <9Tcostoamm> Date: Thu, 17 Aug 2023 14:44:38 -0700 Subject: [PATCH] clean up --- .github/workflows/build.yml | 12 +- hydrocronapi/controllers/db/db.py | 68 ++-- .../controllers/db/db_local.py | 8 + .../controllers/db/hydrocron_database.py | 9 +- hydrocronapi/controllers/subset.py | 30 +- hydrocronapi/controllers/timeseries.py | 36 +-- poetry.lock | 84 ++++- pyproject.toml | 1 + tests/example_load_data.py | 48 ++- tests/hydrocron_database.py | 295 ------------------ tests/test_hydrocron_database.py | 182 ----------- 11 files changed, 209 insertions(+), 564 deletions(-) rename tests/db.py => hydrocronapi/controllers/db/db_local.py (92%) delete mode 100644 tests/hydrocron_database.py delete mode 100644 tests/test_hydrocron_database.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7db8701..3811977 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -207,12 +207,12 @@ jobs: path: dist/*.zip - name: Deploy Terraform - if: | - github.ref == 'refs/heads/develop' || - github.ref == 'refs/heads/main' || - startsWith(github.ref, 'refs/heads/release') || - github.event.head_commit.message == '/deploy sit' || - github.event.head_commit.message == '/deploy uat' + #if: | + # github.ref == 'refs/heads/develop' || + # github.ref == 'refs/heads/main' || + # startsWith(github.ref, 'refs/heads/release') || + # github.event.head_commit.message == '/deploy sit' || + # github.event.head_commit.message == '/deploy uat' working-directory: terraform/ env: AWS_ACCESS_KEY_ID: ${{ secrets[format('AWS_ACCESS_KEY_ID_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }} diff --git a/hydrocronapi/controllers/db/db.py b/hydrocronapi/controllers/db/db.py index 46a8f34..1d565b2 100644 --- a/hydrocronapi/controllers/db/db.py +++ b/hydrocronapi/controllers/db/db.py @@ -1,59 +1,92 @@ +# pylint: disable=W0613 +"""Module to interface with DynamoDB database.""" + + import logging from datetime import datetime from typing import Generator import boto3 -from .hydrocron_database import Hydrocron_DB -from boto3.dynamodb.conditions import Key dynamodb = boto3.client('dynamodb') logger = logging.getLogger() + def get_reach_series(start_time: datetime, end_time: datetime) -> Generator: - table_name = 'hydrocron_swot_reaches_test' + """Get Timeseries for a particular Reach filtering by time range + + :param start_time: Start time of the timeseries + :type start_time: str + :param end_time: End time of the timeseries + :type end_time: str + + :rtype: None + """ + response = dynamodb.get_item( - TableName=table_name, + TableName='hydrocron_swot_reaches_test', Key={ 'reach_id': {'S': '71224100223'} } ) - print("get_item") - print(response) return response + def get_node_series(start_time: datetime, end_time: datetime) -> Generator: - table_name = 'hydrocron_swot_reaches_test' + """Get Timeseries for a particular Node filtering by time range + + :param start_time: Start time of the timeseries + :type start_time: str + :param end_time: End time of the timeseries + :type end_time: str + + :rtype: None + """ + response = dynamodb.get_item( - TableName=table_name, + TableName='hydrocron_swot_reaches_test', Key={ 'node_id': {'S': '71224100223'} } ) - print("get_item") - print(response) return response def get_reach_series_by_feature_id(feature_id: str, start_time: datetime, end_time: datetime) -> Generator: + """Get Timeseries for a particular Reach filtering by ID and time range - #st = float(time.mktime(start_time.timetuple()) - 946710000) - #et = float(time.mktime(end_time.timetuple()) - 946710000) + :param feature_id: Identifier of the feature + :type feature_id: str + :param start_time: Start time of the timeseries + :type start_time: str + :param end_time: End time of the timeseries + :type end_time: str + + :rtype: None + """ - #select * from reach where reach_id like %(feature_id)s and cast(time as float) >= %(start_time)s and cast(time as float) <= %(end_time)s""", - table_name = 'hydrocron_swot_reaches_test' response = dynamodb.get_item( - TableName=table_name, + TableName='hydrocron_swot_reaches_test', Key={ 'reach_id': {'S': feature_id} } ) - print("get_item") - print(response) return response def get_node_series_by_feature_id(feature_id, start_time, end_time): + """Get Timeseries for a particular Node filtering by ID and time range + + :param feature_id: Identifier of the feature + :type feature_id: str + :param start_time: Start time of the timeseries + :type start_time: str + :param end_time: End time of the timeseries + :type end_time: str + + :rtype: None + """ table_name = 'hydrocron_swot_reaches_test' response = dynamodb.get_item( TableName=table_name, @@ -61,5 +94,4 @@ def get_node_series_by_feature_id(feature_id, start_time, end_time): 'node_id': {'S': feature_id} } ) - print(response) return response diff --git a/tests/db.py b/hydrocronapi/controllers/db/db_local.py similarity index 92% rename from tests/db.py rename to hydrocronapi/controllers/db/db_local.py index 36f91bf..215719f 100644 --- a/tests/db.py +++ b/hydrocronapi/controllers/db/db_local.py @@ -1,3 +1,10 @@ +# pylint: disable=W0613 +# pylint: disable=C0116 +# pylint: disable=C0411 +# pylint: disable=W0511 +"""Module to interface with DynamoDB database.""" + + import logging from datetime import datetime from typing import Generator @@ -26,6 +33,7 @@ dynamo_instance = Hydrocron_DB(dyn_resource=dyndb_resource) + def get_reach_series(start_time: datetime, end_time: datetime) -> Generator: hydrocron_table = dynamo_instance.load_table('hydrocron_swot_reaches_test') items = hydrocron_table.query(KeyConditionExpression=Key('reach_id').eq('71224100223')) diff --git a/hydrocronapi/controllers/db/hydrocron_database.py b/hydrocronapi/controllers/db/hydrocron_database.py index 9c6e3b4..f0821ea 100644 --- a/hydrocronapi/controllers/db/hydrocron_database.py +++ b/hydrocronapi/controllers/db/hydrocron_database.py @@ -1,3 +1,9 @@ +# pylint: disable=R1720 +# pylint: disable=C0121 +# pylint: disable=W0612 +# pylint: disable=C0103 +# pylint: disable=C0115 +# pylint: disable=C0114 import logging from botocore.exceptions import ClientError from boto3.dynamodb.conditions import Key @@ -220,7 +226,6 @@ def add_data(self, **kwargs): for key, value in kwargs.items(): item_dict[key] = value - item_id = item_dict[self.partition_key_name] try: self.table.put_item( Item=item_dict @@ -249,7 +254,7 @@ def run_query(self, partition_key, sort_key=None): The item. """ - if sort_key == None: + if sort_key is None: try: response = self.table.query( diff --git a/hydrocronapi/controllers/subset.py b/hydrocronapi/controllers/subset.py index 410a21c..3ffb8ea 100644 --- a/hydrocronapi/controllers/subset.py +++ b/hydrocronapi/controllers/subset.py @@ -1,8 +1,6 @@ # pylint: disable=duplicate-code # pylint: disable=R1702 -# pylint: disable=C0114 -# pylint: disable=E0401 -# pylint: disable=W0613 +"""Module defining Lambda workflow for subset endpoint.""" import json import logging @@ -12,7 +10,7 @@ from shapely import Polygon, Point -import hydrocronapi.controllers.db.db as db +from hydrocronapi.controllers.db import db logger = logging.getLogger() @@ -152,7 +150,7 @@ def format_subset_csv(results: Generator, polygon, fields): """ # Fetch all results from query - results = results['Items'] + results = results['Item'] data = {} @@ -185,16 +183,10 @@ def format_subset_csv(results: Generator, polygon, fields): return csv - -def lambda_handler(event, context): +def lambda_handler(event): """ This function queries the database for relevant results """ - print("test timeseries 3") - print("body") - print(event['body']) - print("feature") - print(event['body']['feature']) feature = event['body']['feature'] subsetpolygon = event['body']['subsetpolygon'] @@ -213,15 +205,13 @@ def lambda_handler(event, context): data['time'] = str(10) + " ms." data['hits'] = 10 - data['search on'] = dict( - parameter="identifier", - exact="exact", - page_number=0, - page_size=20 - ) + data['search on'] = { + "parameter": "identifier", + "exact": "exact", + "page_number": 0, + "page_size": 20 + } data['results'] = results return data - - diff --git a/hydrocronapi/controllers/timeseries.py b/hydrocronapi/controllers/timeseries.py index dd2d042..0975005 100644 --- a/hydrocronapi/controllers/timeseries.py +++ b/hydrocronapi/controllers/timeseries.py @@ -1,13 +1,12 @@ # pylint: disable=duplicate-code -# pylint: disable=C0114 -# pylint: disable=W0613 +"""Module defining Lambda workflow for subset endpoint.""" import logging import time from datetime import datetime from typing import Generator -import hydrocronapi.controllers.db.db as db +from hydrocronapi.controllers.db import db logger = logging.getLogger() @@ -39,8 +38,6 @@ def gettimeseries_get(feature, feature_id, start_time, end_time, output, fields) end_time = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S") start = time.time() - print("before db") - print(feature.lower()) if feature.lower() == 'reach': results = db.get_reach_series_by_feature_id(feature_id, start_time, end_time) elif feature.lower() == 'node': @@ -72,10 +69,7 @@ def format_json(results: Generator, feature_id, elapsed_time): """ # Fetch all results - print("RESULTS") res = results['Item'] - print(results) - print(res) data = {} @@ -93,7 +87,6 @@ def format_json(results: Generator, feature_id, elapsed_time): i = 0 if res['time'] != '-999999999999': # and (res['width'] != '-999999999999')): - print(feature_id) feature = {'properties': {}, 'geometry': {}, 'type': "Feature"} feature['geometry']['coordinates'] = [] feature_type = '' @@ -122,7 +115,6 @@ def format_json(results: Generator, feature_id, elapsed_time): feature['properties']['slope'] = float(res['slope']['S']) data['features'].append(feature) - print(data) data['hits'] = i return data @@ -142,7 +134,7 @@ def format_csv(results: Generator, feature_id, fields): """ # Fetch all results - results = results['Items'] + results = results['Item'] data = {} @@ -152,7 +144,6 @@ def format_csv(results: Generator, feature_id, fields): data['error'] = f'413: Query exceeds 6MB with {len(results)} hits.' else: - # csv = "feature_id, time_str, wse, geometry\n" csv = fields + '\n' fields_set = fields.split(", ") for res in results: @@ -174,15 +165,10 @@ def format_csv(results: Generator, feature_id, fields): return csv -def lambda_handler(event, context): +def lambda_handler(event): """ This function queries the database for relevant results """ - print("test timeseries 3") - print("body") - print(event['body']) - print("feature") - print(event['body']['feature']) feature = event['body']['feature'] feature_id = event['body']['reach_id'] @@ -201,15 +187,13 @@ def lambda_handler(event, context): data['time'] = str(10) + " ms." data['hits'] = 10 - data['search on'] = dict( - parameter="identifier", - exact="exact", - page_number=0, - page_size=20 - ) + data['search on'] = { + "parameter": "identifier", + "exact": "exact", + "page_number": 0, + "page_size": 20 + } data['results'] = results return data - - diff --git a/poetry.lock b/poetry.lock index 52bdb14..3644986 100644 --- a/poetry.lock +++ b/poetry.lock @@ -19,6 +19,44 @@ wrapt = [ {version = ">=1.14,<2", markers = "python_version >= \"3.11\""}, ] +[[package]] +name = "boto3" +version = "1.28.29" +description = "The AWS SDK for Python" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "boto3-1.28.29-py3-none-any.whl", hash = "sha256:7b8e7deee9f665612b3cd7412989aaab0337d8006a0490a188c814af137bd32d"}, + {file = "boto3-1.28.29.tar.gz", hash = "sha256:1ab375c231547db4c9ce760e29cbe90d0341fe44910571b1bc4967a72fd8276f"}, +] + +[package.dependencies] +botocore = ">=1.31.29,<1.32.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.31.29" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">= 3.7" +files = [ + {file = "botocore-1.31.29-py3-none-any.whl", hash = "sha256:d3dc422491b3a30667f188f3434541a1dd86d6f8ed7f98ca26e056ae7d912c85"}, + {file = "botocore-1.31.29.tar.gz", hash = "sha256:71b335a47ee061994ac12f15ffe63c5c783cb055dc48079b7d755a46b9c1918c"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.16.26)"] + [[package]] name = "colorama" version = "0.4.6" @@ -102,6 +140,17 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + [[package]] name = "lazy-object-proxy" version = "1.9.0" @@ -379,6 +428,23 @@ files = [ {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, ] +[[package]] +name = "s3transfer" +version = "0.6.2" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:b014be3a8a2aab98cfe1abc7229cc5a9a0cf05eb9c1f2b86b230fd8df3f78084"}, + {file = "s3transfer-0.6.2.tar.gz", hash = "sha256:cab66d3380cca3e70939ef2255d01cd8aece6a4907a9528740f668c4b0611861"}, +] + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + [[package]] name = "shapely" version = "2.0.1" @@ -477,6 +543,22 @@ files = [ {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, ] +[[package]] +name = "urllib3" +version = "1.26.16" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"}, + {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + [[package]] name = "wrapt" version = "1.15.0" @@ -564,4 +646,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "17b3dac68af318c1ec29ee03ebf86915ada658c0d1436c8e61f6fb890b2e8a6a" +content-hash = "e32ba106ba817bdb6ccb5e912d6da514ec4b1d6fbe594a4e3af0c54f0bbbdf36" diff --git a/pyproject.toml b/pyproject.toml index 459ead7..85af1a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ packages = [ python = "^3.9" pandas = "^1.4.1" shapely = "^2.0.1" +boto3 = "^1.28.18" [tool.poetry.dev-dependencies] pytest = "^7.2" diff --git a/tests/example_load_data.py b/tests/example_load_data.py index 9a4e698..0ad67d6 100644 --- a/tests/example_load_data.py +++ b/tests/example_load_data.py @@ -1,21 +1,41 @@ +#!/usr/bin/env python +# coding: utf-8 + import boto3 +import json +from decimal import Decimal import geopandas as gpd -dynamodb = boto3.client('dynamodb') +from hydrocronapi.controllers.db.hydrocron_database import Hydrocron_DB + +session = boto3.session.Session(aws_access_key_id='a', + aws_secret_access_key='a', + aws_session_token='fake_session_token', + region_name='us-west-2') + +dyndb_resource = session.resource('dynamodb', endpoint_url='http://localhost:8000') + +dynamo_instance = Hydrocron_DB(dyn_resource=dyndb_resource) + + +table_name = 'hydrocron_swot_reaches_test' +test_shapefile_path = 'tests/data/SWOT_L2_HR_RiverSP_Reach_548_011_NA_20230610T193337_20230610T193344_PIA1_01/SWOT_L2_HR_RiverSP_Reach_548_011_NA_20230610T193337_20230610T193344_PIA1_01.shp' + +if dynamo_instance.table_exists(table_name): + dynamo_instance.delete_table(table_name) + +hydrocron_reach_table = dynamo_instance.create_table(table_name, + partition_key='reach_id', partition_key_type='S', + sort_key='time', sort_key_type='N') + +shp_file = gpd.read_file(test_shapefile_path) +item_attrs = {} +for index, row in shp_file.iterrows(): + item_attrs = json.loads(row.to_json(default_handler=str), parse_float=Decimal) -def upload(): - test_shapefile_path = 'tests/data/SWOT_L2_HR_RiverSP_Reach_548_011_NA_20230610T193337_20230610T193344_PIA1_01/SWOT_L2_HR_RiverSP_Reach_548_011_NA_20230610T193337_20230610T193344_PIA1_01.shp' - shp_file = gpd.read_file(test_shapefile_path) + hydrocron_reach_table.add_data(**item_attrs) - for index, row in shp_file.iterrows(): - object = {} - for k, v in row.items(): - object[k] = {'S' : str(v)} - response = dynamodb.put_item( - TableName='hydrocron_swot_reaches_test', - Item=object - ) - print(response) +items = hydrocron_reach_table.run_query(partition_key='71224100223') +print(items) -upload() diff --git a/tests/hydrocron_database.py b/tests/hydrocron_database.py deleted file mode 100644 index 9c6e3b4..0000000 --- a/tests/hydrocron_database.py +++ /dev/null @@ -1,295 +0,0 @@ -import logging -from botocore.exceptions import ClientError -from boto3.dynamodb.conditions import Key - -logger = logging.getLogger(__name__) - - -class Hydrocron_DB: - def __init__(self, dyn_resource): - """ - Parameters - ----------- - dyn_resource : boto3.session.resource('dynamodb') - A Boto3 DynamoDB resource. - - """ - self.dyn_resource = dyn_resource - self.tables = [] - - def table_exists(self, table_name): - """ - Determines whether a table exists. If table exists, load it. - - Parameters - ---------- - table_name : string - The name of the table to check. - - Returns - ------- - boolean - True when the table exists; otherwise, False. - """ - try: - table = self.dyn_resource.Table(table_name) - table.load() - exists = True - self.tables.append(table_name) - except ClientError as err: - if err.response['Error']['Code'] == 'ResourceNotFoundException': - exists = False - else: - logger.error( - "Couldn't check for existence of %s. %s: %s", - table_name, - err.response['Error']['Code'], err.response['Error']['Message']) - raise - - return exists - - def create_table(self, table_name, partition_key, partition_key_type, sort_key, sort_key_type): - """ - Creates an Amazon DynamoDB table to store SWOT River Reach, Node, or Lake data for the Hydrocron API. - - Parameters - --------- - table_name : string - The name of the table to create. - - Returns - ------- - dict - The newly created table. - """ - try: - new_table = Hydrocron_Table(self.dyn_resource, - table_name, - partition_key, partition_key_type, - sort_key, sort_key_type) - - self.tables.append(new_table.table_name) - - except ClientError as err: - logger.error( - "Couldn't create table %s. %s: %s", table_name, - err.response['Error']['Code'], err.response['Error']['Message']) - raise - else: - return new_table - - def load_table(self, table_name): - """ - Loads an Amazon DynamoDB table - - Parameters - --------- - table_name : string - The name of the table to create. - - Returns - ------- - dict - The newly created table. - """ - try: - table = self.dyn_resource.Table(table_name) - table.load() - - except ClientError as err: - logger.error( - "Couldn't load table %s. %s: %s", table_name, - err.response['Error']['Code'], err.response['Error']['Message']) - raise - else: - return table - - def list_tables(self): - """ - Lists the Amazon DynamoDB tables for the current account. - - Returns - ------- - list - The list of tables. - """ - try: - for table in self.dyn_resource.tables.all(): - print(table.name) - - except ClientError as err: - logger.error( - "Couldn't list tables. %s: %s", - err.response['Error']['Code'], err.response['Error']['Message']) - raise - else: - return self.tables - - def delete_table(self, table_name): - """ - Deletes the table. - """ - try: - table = self.dyn_resource.Table(table_name) - table.delete() - table.wait_until_not_exists() - - self.tables = [x for x in self.tables if x is not table_name] - - except ClientError as err: - logger.error( - "Couldn't delete table. %s: %s", - err.response['Error']['Code'], err.response['Error']['Message']) - raise - - -class Hydrocron_Table: - def __init__(self, dyn_resource, - table_name, - partition_key_name, partition_key_type, - sort_key_name, sort_key_type): - """ - Parameters - ----------- - dyn_resource : boto3.session.resource('dynamodb') - A Boto3 DynamoDB resource. - table_name : string - The name of the table to create. - partition_key_name : string - the name of the partition key - partition_key_type : string - the type of the partition key - sort_key_name : string - the name of the sort key, usually time - sort_key_type: string - the type of the sort key. - - - """ - self.dyn_resource = dyn_resource - self.table_name = table_name - self.partition_key_name = partition_key_name - self.partition_key_type = partition_key_type - self.sort_key_name = sort_key_name - self.sort_key_type = sort_key_type - - self.table = self.create_table() - - def create_table(self): - """ - Creates an Amazon DynamoDB table to store SWOT River Reach, Node, or Lake data for the Hydrocron API. - - - Returns - ------- - dict - The newly created table. - """ - try: - self.table = self.dyn_resource.create_table( - TableName=self.table_name, - KeySchema=[ - {'AttributeName': self.partition_key_name, 'KeyType': 'HASH'}, # Partition key - {'AttributeName': self.sort_key_name, 'KeyType': 'RANGE'} # Sort key - ], - AttributeDefinitions=[ - {'AttributeName': self.partition_key_name, 'AttributeType': self.partition_key_type}, - {'AttributeName': self.sort_key_name, 'AttributeType': self.sort_key_type} - ], - ProvisionedThroughput={'ReadCapacityUnits': 10, 'WriteCapacityUnits': 10}) - self.table.wait_until_exists() - except ClientError as err: - logger.error( - "Couldn't create table %s. %s: %s", self.table_name, - err.response['Error']['Code'], err.response['Error']['Message']) - raise - else: - return self.table - - def add_data(self, **kwargs): - """ - Adds a data item to the table. - - Parameters - --------- - **kwargs: All attributes to add to the item. Must include partition and sort keys - """ - - item_dict = {} - - for key, value in kwargs.items(): - item_dict[key] = value - - item_id = item_dict[self.partition_key_name] - try: - self.table.put_item( - Item=item_dict - ) - except ClientError as err: - logger.error( - "Couldn't add item %s to table %s. Here's why: %s: %s", - self.partition_key_name, self.table.name, - err.response['Error']['Code'], err.response['Error']['Message']) - raise - - def run_query(self, partition_key, sort_key=None): - """ - Perform a query for multiple items. - - Parameters - ---------- - partition_key : string - the feature id to query - sort_key : integer - the value of the sort keys to query - - - Returns - ------- - The item. - - """ - if sort_key == None: - - try: - response = self.table.query( - KeyConditionExpression=(Key(self.partition_key_name).eq(partition_key))) - except ClientError as err: - logger.error( - "Couldn't query for items: %s: %s", - err.response['Error']['Code'], err.response['Error']['Message']) - raise - else: - return response['Items'] - - else: - try: - response = self.table.query( - KeyConditionExpression=(Key(self.partition_key_name).eq(partition_key) & - Key(self.sort_key_name).eq(sort_key))) - except ClientError as err: - logger.error( - "Couldn't query for items: %s: %s", - err.response['Error']['Code'], err.response['Error']['Message']) - raise - else: - return response['Items'] - - def delete_item(self, partition_key, sort_key): - """ - Deletes an item from the table. - - Parameters - ---------- - partition_key: string - The ID of the item to delete. - sort_key: string - The timestamp of the item to delete. - """ - try: - self.table.delete_item(Key={self.partition_key_name: partition_key, self.sort_key_name: sort_key}) - except ClientError as err: - logger.error( - "Couldn't delete item %s. %s: %s", partition_key, - err.response['Error']['Code'], err.response['Error']['Message']) - raise diff --git a/tests/test_hydrocron_database.py b/tests/test_hydrocron_database.py deleted file mode 100644 index 3da9366..0000000 --- a/tests/test_hydrocron_database.py +++ /dev/null @@ -1,182 +0,0 @@ -""" -============== -test_create_table.py -============== -Test creating a Hydrocron dynamodb table. - -Unit tests for creating tables and adding items to the Hydrocron Database. -Requires a local install of DynamoDB to be running. See https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html - -""" -import pytest -import boto3 -import json -from decimal import Decimal -import geopandas as gpd - -from hydrocron_database import Hydrocron_DB - - -test_shapefile_path = 'hydrocron-db/tests/data/SWOT_L2_HR_RiverSP_Reach_548_011_NA_20230610T193337_20230610T193344_PIA1_01/SWOT_L2_HR_RiverSP_Reach_548_011_NA_20230610T193337_20230610T193344_PIA1_01.shp' -test_table_name = 'hydrocron_test_table' -test_partition_key_name = 'reach_id' -test_sort_key_name = 'time' - -def test_create_table(dynamo_instance): - ''' - Tests table creation function - ''' - - if dynamo_instance.table_exists(test_table_name): - dynamo_instance.delete_table(test_table_name) - - hydrocron_test_table = dynamo_instance.create_table(test_table_name, - partition_key=test_partition_key_name, partition_key_type='S', - sort_key=test_sort_key_name, sort_key_type='N') - else: - hydrocron_test_table = dynamo_instance.create_table(test_table_name, - partition_key='reach_id', partition_key_type='S', - sort_key='time', sort_key_type='N') - - assert dynamo_instance.table_exists(test_table_name) - assert hydrocron_test_table.table_name == test_table_name - - -def test_table_exists(dynamo_instance): - ''' - Test that a table exists in the database - ''' - - assert dynamo_instance.table_exists(test_table_name) - - -def test_list_tables(dynamo_instance): - ''' - Test listing tables that exist in database - ''' - - - if dynamo_instance.table_exists(test_table_name): - list_of_tables = dynamo_instance.list_tables() - - assert len(list_of_tables) > 0 - assert test_table_name in list_of_tables - - else: - assert len(list_of_tables) == 0 - -def test_add_data(dynamo_instance): - ''' - Test adding data from one Reach shapefile to db - ''' - - if dynamo_instance.table_exists(test_table_name): - dynamo_instance.delete_table(test_table_name) - - hydrocron_test_table = dynamo_instance.create_table(test_table_name, - partition_key='reach_id', partition_key_type='S', - sort_key='time', sort_key_type='N') - - # read shapefile into geopandas dataframe - shp_file = gpd.read_file(test_shapefile_path) - - item_attrs = {} - for index, row in shp_file.iterrows(): - # convert each reach into a dictionary of attributes that dynamo can read - item_attrs = json.loads(row.to_json(default_handler=str), parse_float=Decimal) - - # write to the table - hydrocron_test_table.add_data( **item_attrs) - - assert hydrocron_test_table.table.item_count == 687 - - -def test_query(dynamo_instance): - ''' - Test a query for a reach id - ''' - - if dynamo_instance.table_exists(test_table_name): - dynamo_instance.delete_table(test_table_name) - - hydrocron_test_table = dynamo_instance.create_table(test_table_name, - partition_key='reach_id', partition_key_type='S', - sort_key='time', sort_key_type='N') - - # read shapefile into geopandas dataframe - shp_file = gpd.read_file(test_shapefile_path) - - item_attrs = {} - for index, row in shp_file.iterrows(): - # convert each reach into a dictionary of attributes that dynamo can read - item_attrs = json.loads(row.to_json(default_handler=str), parse_float=Decimal) - - # write to the table - hydrocron_test_table.add_data( **item_attrs) - - - items = hydrocron_test_table.run_query(partition_key='71224100223') - - assert items[0]['wse'] == Decimal(str(286.2983)) - -def test_delete_item(dynamo_instance): - if dynamo_instance.table_exists(test_table_name): - dynamo_instance.delete_table(test_table_name) - - hydrocron_test_table = dynamo_instance.create_table(test_table_name, - partition_key='reach_id', partition_key_type='S', - sort_key='time', sort_key_type='N') - - # read shapefile into geopandas dataframe - shp_file = gpd.read_file(test_shapefile_path) - - item_attrs = {} - for index, row in shp_file.iterrows(): - # convert each reach into a dictionary of attributes that dynamo can read - item_attrs = json.loads(row.to_json(default_handler=str), parse_float=Decimal) - - # write to the table - hydrocron_test_table.add_data( **item_attrs) - - hydrocron_test_table.delete_item(partition_key='71224100203', sort_key=Decimal(-999999999999.000)) - assert hydrocron_test_table.table.item_count == 686 - -def test_delete_table(dynamo_instance): - ''' - Test delete table - ''' - - if dynamo_instance.table_exists(test_table_name): - dynamo_instance.delete_table(test_table_name) - else: - dynamo_instance.create_table(test_table_name, - partition_key='reach_id', partition_key_type='S', - sort_key='time', sort_key_type='N') - - dynamo_instance.delete_table() - - assert not dynamo_instance.table_exists(test_table_name) - - -@pytest.fixture(scope='session') -def dynamo_instance(): - ''' - Set up a boto3 resource connection to a local dynamodb instance. - Assumes Local DynamoDB instance installed and running. See https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html - - Returns - ------- - dyndb_resource - A dynamodb local resource - ''' - - session = boto3.session.Session(aws_access_key_id='a', - aws_secret_access_key='a', - aws_session_token='fake_session_token', - region_name='us-west-2') - - dyndb_resource = session.resource('dynamodb', endpoint_url='http://localhost:8000') - - dynamo_instance = Hydrocron_DB(dyn_resource=dyndb_resource) - - return dynamo_instance