From b888cf2bd5ac7f0002ce5233b8dbde631500fa7a Mon Sep 17 00:00:00 2001 From: Stijn Van Hoey Date: Tue, 22 Aug 2023 00:40:59 +0200 Subject: [PATCH] Improve the documentation (#55) * Use sphinx-click integration using rst syntax * Fix CI setup * Fix #52 by adding example for local usage * Fix #51 by rendering docstring in readme using sphinx-click * Explain how to support new vpts-csv versions * Add sphinx-click rst source file * Fix formatting * Extend info on CLI endpoints * Update unit test * Update unit test --- .github/workflows/release.yml | 6 +- CONTRIBUTING.md | 13 +++++ README.md | 84 +++++++++++++++++++-------- docs/api/vptstools.bin.rst | 8 +++ docs/click.rst | 9 +++ docs/conf.py | 1 + setup.cfg | 1 + src/vptstools/bin/transfer_baltrad.py | 44 +++++++------- src/vptstools/bin/vph5_to_vpts.py | 25 ++++++-- src/vptstools/vpts.py | 8 ++- src/vptstools/vpts_csv.py | 2 +- tests/test_vph5_to_vpts.py | 2 +- 12 files changed, 145 insertions(+), 58 deletions(-) create mode 100644 docs/click.rst diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 90b1fd1..8fc9148 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -123,10 +123,10 @@ jobs: python -m pip install --upgrade pip python -m pip install -r requirements.txt python -m pip install '${{ needs.prepare.outputs.wheel-distribution }}' - python -m pip install pytest + python -m pip install pytest moto[s3] - name: Run unit tests on pinned dependencies run: >- - pytest -rFEx --durations 10 --color yes # pytest args + pytest -rFEx --no-cov --durations 10 --color yes # pytest args docker: needs: finalize @@ -135,6 +135,8 @@ jobs: - name: Retrieve pre-built distribution files uses: actions/download-artifact@v3 with: {name: python-distribution-files, path: dist/} + - uses: actions/checkout@v3 # get Dockerfile from repo + with: {fetch-depth: 0} # deep clone for setuptools-scm - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - name: Configure AWS credentials diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed2d322..e300402 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -239,6 +239,19 @@ for `vptstools` to pypi can be done manually as well with the following steps: 500KB), unwanted clutter may have been accidentally included. 4. Run `tox -e publish -- --repository pypi` and check that everything was uploaded to [PyPI] correctly. +(new-vptscsv-version)= +### Support a new version of the VPTS-CSV data exchange format + +To support a new version of the VPTS-CSV data exchange format, following adjustments in the {py:mod}`vptstools.vpts_csv` +module are required: + +- Create a new class `VptsCsvVX` which subclasses from the abstract class {py:class}`vptstools.vpts_csv.AbstractVptsCsv` +- Overwrite the abstract methods to define 'no data' representation, the 'Undetect' representation, the sorting logic + and the mapping of the individual fields from ODIM bird profile to the VPTS CSV data format. Check the + {py:class}`vptstools.vpts_csv.AbstractVptsCsv` documentation for more info. +- Link the string version ID (v1, v2,..) with the correct `AbstractVptsCsv` child class by extending the + {py:func}`vptstools.vpts_csv.get_vpts_version` with a new mapping from version string to class instance. + [black]: https://pypi.org/project/black/ [commonmark]: https://commonmark.org/ diff --git a/README.md b/README.md index e70499b..1ef2014 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/) [![PyPI-Server](https://img.shields.io/pypi/v/vptstools.svg)](https://pypi.org/project/vptstools/) -[![.github/workflows/run_tests.yaml](https://github.com/enram/vptstools/actions/workflows/release.yml/badge.svg)](https://github.com/enram/vptstools/actions/workflows/release.yml) +[![.github/workflows/release.yml](https://github.com/enram/vptstools/actions/workflows/release.yml/badge.svg)](https://github.com/enram/vptstools/actions/workflows/release.yml) vptstools is a Python library to transfer and convert vpts data. VPTS (vertical profile time series) express the density, speed and direction of biological signals such as birds, bats and insects within a weather radar volume, @@ -22,39 +22,75 @@ If you need the tools/services to transfer data (SFTP, S3) install these additio pip install vptstools\[transfer\] ``` -## CLI endpoints +## Usage -In addition to using functions in Python scripts, two vptstools functions can be called from the command line: +As a library user interested in working with ODIM h5 and vpts files, the most important functions provided by the +package are {py:func}`vptstools.vpts.vp`, {py:func}`vptstools.vpts.vpts` and {py:func}`vptstools.vpts.vpts_to_csv`, +which can be used respectively to convert a single `h5` file, a set of `h5` files and save a `vpts` DataFrame +to a csv-file: -### transfer_baltrad +- Convert a single local ODIM h5 file to a vp DataFrame: -CLI tool to move files from the Baltrad FTP server to an S3 bucket. +```python +from vptstools.vpts import vp -```shell -transfer_baltrad +file_path_h5 = "./NLDBL_vp_20080215T0010_NL50_v0-3-20.h5" +df_vp = vp(file_path_h5) ``` -Configuration is loaded from environmental variables: +- Convert a set of locally stored ODIM h5 files to a vpts DataFrame: -- FTP_HOST: Baltrad FTP host ip address -- FTP_PORT: Baltrad FTP host port -- FTP_USERNAME: Baltrad FTP user name -- FTP_PWD: Baltrad FTP password -- FTP_DATADIR: Baltrad FTP directory to load data files from -- DESTINATION_BUCKET: AWS S3 bucket to write data to -- SNS_TOPIC: AWS SNS topic to report when routine fails -- AWS_PROFILE: AWS profile (mainly for local development) +```python +from pathlib import Path +from vptstools.vpts import vpts -### vph5_to_vpts +file_paths = sorted(Path("./data").rglob("*.h5")) # Get all h5 files within the data directory +df_vpts = vpts(file_paths) +``` -CLI tool to aggregate/convert the [ODIM hdf5 bird profile](https://github.com/adokter/vol2bird/wiki/ODIM-bird-profile-format-specification) -files available on the aloft S3 bucket (as generated by [vol2bird](https://github.com/adokter/vol2bird)) to -daily and monthly aggregates following the [VPTS CSV file specification](https://github.com/enram/vpts-csv). +- Store a `vp` or `vpts` DataFrame to a [VPTS CSV](https://aloftdata.eu/vpts-csv/) file: -The CLI checks the modified date of the uploaded ODIM hdf5 files and applies the aggregation/conversion for the files modified within the defined time window: +```python +from vptstools.vpts import vpts_to_csv -```shell -vph5_to_vpts --modified-days-ago=1 +vpts_to_csv(df_vpts, "vpts.csv") +``` + +```{note} +Both {py:func}`vptstools.vpts.vp` and {py:func}`vptstools.vpts.vpts` have 2 other optional parameters related to the +[VPTS-CSV data exchange format](https://aloftdata.eu/vpts-csv/). The `vpts_csv_version` parameter defines the version of the +[VPTS-CSV data exchange standard](https://aloftdata.eu/vpts-csv/) (default v1) whereas the `source_file` provides a way to define +a custom [source_file](https://aloftdata.eu/vpts-csv/#source_file) field to reference the source from which the +data were derived. +``` + +To validate a vpts DataFrame against the frictionless data schema as defined by the VPTS-CSV data exchange +format and return a report, use the {py:func}`vptstools.vpts.validate_vpts`: + +```python +from vptstools.vpts import validate_vpts + +report = validate_vpts(df_vpts, version="v1") +report.stats["errors"] +``` + +Other modules in the package are: + +- {py:mod}`vptstools.odimh5`: This module extents the implementation of the original + [odimh5 package](https://pypi.org/project/odimh5/) which is now deprecated. +- {py:mod}`vptstools.vpts_csv`: This module contains - for each version of the VPTS-CSV exchange format - the + corresponding implementation which can be used to generate a `vp` or `vpts` DataFrame. For more information on how to + support a new version of the VPTS-CSV format, see [contributing docs](#new-vptscsv-version). +- {py:mod}`vptstools.s3`: This module contains the functions to manage the + aloft data repository](https://aloftdata.eu/browse/) S3 Bucket. + +## CLI endpoints + +In addition to using functions in Python scripts, two vptstools routines are available to be called from the command line +after installing the package: + +```{eval-rst} +.. include:: click.rst ``` ## Development instructions @@ -85,7 +121,7 @@ tox -av # List all available tasks To create a pinned `requirements.txt` set of dependencies, [pip-tools](https://github.com/jazzband/pip-tools) is used: -```commandline +```bash pip-compile --extra transfer --resolver=backtracking` ``` diff --git a/docs/api/vptstools.bin.rst b/docs/api/vptstools.bin.rst index 3dde9b5..f862b6b 100644 --- a/docs/api/vptstools.bin.rst +++ b/docs/api/vptstools.bin.rst @@ -6,6 +6,14 @@ vptstools.bin namespace Submodules ---------- +vptstools.bin.click\_exception module +------------------------------------- + +.. automodule:: vptstools.bin.click_exception + :members: + :undoc-members: + :show-inheritance: + vptstools.bin.transfer\_baltrad module -------------------------------------- diff --git a/docs/click.rst b/docs/click.rst new file mode 100644 index 0000000..ff636db --- /dev/null +++ b/docs/click.rst @@ -0,0 +1,9 @@ + + + +.. click:: vptstools.bin.transfer_baltrad:cli + :prog: transfer_baltrad + + +.. click:: vptstools.bin.vph5_to_vpts:cli + :prog: vph5_to_vpts \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 0bfcbc9..ecd76a7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -72,6 +72,7 @@ "sphinx.ext.ifconfig", "sphinx.ext.mathjax", "sphinx.ext.napoleon", + "sphinx_click" ] # Add any paths that contain templates here, relative to this directory. diff --git a/setup.cfg b/setup.cfg index 94e9392..c327077 100644 --- a/setup.cfg +++ b/setup.cfg @@ -75,6 +75,7 @@ develop = black sphinx sphinx_rtd_theme + sphinx-click myst-parser[linkify] moto[s3] tox diff --git a/src/vptstools/bin/transfer_baltrad.py b/src/vptstools/bin/transfer_baltrad.py index 13d3362..8cbce41 100644 --- a/src/vptstools/bin/transfer_baltrad.py +++ b/src/vptstools/bin/transfer_baltrad.py @@ -1,23 +1,3 @@ -""" -Python CLI script that: -- Connects via SFTP to the BALTRAD server -- For each vp file (pvol gets ignored), download the file from the server and - upload it to the "aloft" S3 bucket -- If file already exists at destination => do nothing - -Designed to be executed daily via a simple scheduled job like cron (files disappear after a few -days on the BALTRAD server) - -Configuration is loaded from environmental variables: -- FTP_HOST: Baltrad FTP host ip address -- FTP_PORT: Baltrad FTP host port -- FTP_USERNAME: Baltrad FTP user name -- FTP_PWD: Baltrad FTP password -- FTP_DATADIR: Baltrad FTP directory to load data files from -- DESTINATION_BUCKET: AWS S3 bucket to write data to -- SNS_TOPIC: AWS SNS topic to report when routine fails -- AWS_PROFILE: AWS profile (mainly for local development) -""" import datetime import os from functools import partial @@ -68,8 +48,7 @@ def s3_key_exists(key: str, bucket: str, s3_client) -> bool: def extract_metadata_from_filename(filename: str) -> tuple: - """Extract the metadata from the filename (format - such as 'fropo_vp_20220809T051000Z_0xb') + """Extract the metadata from the filename (format such as 'fropo_vp_20220809T051000Z_0xb') All returned values are strings, month and days are 0-prefixed if they are single-digit. @@ -92,6 +71,27 @@ def extract_metadata_from_filename(filename: str) -> tuple: @click.command(cls=catch_all_exceptions(click.Command, handler=report_sns)) # Add SNS-reporting to exception def cli(): + """Sync files from Baltrad FTP server to the aloft s3 bucket. + + This function connects via SFTP to the BALTRAD server, downloads the available ``vp`` files (``pvol`` gets ignored), + from the FTP server and upload the h5 file to the 'aloft' S3 bucket according to the defined folder path name + convention. Existing files are ignored. + + Designed to be executed via a simple scheduled job like cron or scheduled cloud function. Remark that + files disappear after a few days on the BALTRAD server. + + Configuration is loaded from the following environmental variables: + + - ``FTP_HOST``: Baltrad FTP host ip address + - ``FTP_PORT``: Baltrad FTP host port + - ``FTP_USERNAME``: Baltrad FTP user name + - ``FTP_PWD``: Baltrad FTP password + - ``FTP_DATADIR``: Baltrad FTP directory to load data files from + - ``DESTINATION_BUCKET``: AWS S3 bucket to write data to + - ``SNS_TOPIC``: AWS SNS topic to report when routine fails + - ``AWS_REGION``: AWS region where the SNS alerting is defined + - ``AWS_PROFILE``: AWS profile (mainly useful for local development when working with multiple AWS profiles) + """ cli_start_time = datetime.datetime.now() click.echo(f"Start transfer Baltrad FTP sync at {cli_start_time}") click.echo("Read configuration from environmental variables.") diff --git a/src/vptstools/bin/vph5_to_vpts.py b/src/vptstools/bin/vph5_to_vpts.py index 9b0ac08..55af9af 100644 --- a/src/vptstools/bin/vph5_to_vpts.py +++ b/src/vptstools/bin/vph5_to_vpts.py @@ -49,11 +49,26 @@ "modified-days-ago. If 0, all h5 files in the bucket will be included.", ) def cli(modified_days_ago): - """Convert and aggregate h5 vp files to daily/monthly vpts files on S3 bucket - - Check the latest modified h5 vp files on the S3 bucket using an S3 inventory, - convert those files from ODIM bird profile to the VPTS CSV format and - upload the generated daily/monthly vpts files to S3. + """Convert and aggregate h5 vp files to daily and monthly vpts-csv files on S3 bucket + + Check the latest modified + `ODIM h5 bird vp profile `_ on the + aloft S3 bucket (as generated by `vol2bird `_ and transferred using the + :py:mod:`vpts.bin.transfer_baltrad` CLI routine). Using an + `s3 inventory bucket `_, check which + h5 files were recently added and convert those files from ODIM bird profile to the + `VPTS-CSV format `_. Finally, upload the generated daily/monthly vpts files to S3. + + Besides, while scanning the s3 inventory to define the files to convert, + the CLI routine creates the ``coverage.csv`` file and uploads it to the bucket. + + Configuration is loaded from the following environmental variables: + + - ``S3_BUCKET``: AWS S3 bucket to read and write data to + - ``INVENTORY_BUCKET``: AWS S3 bucket configured as `s3 inventory bucket `_ for the S3_BUCKET. + - ``SNS_TOPIC``: AWS SNS topic to report when routine fails + - ``AWS_REGION``: AWS region where the SNS alerting is defined + - ``AWS_PROFILE``: AWS profile (mainly useful for local development when working with multiple AWS profiles) """ if AWS_PROFILE: storage_options = {"profile": AWS_PROFILE} diff --git a/src/vptstools/vpts.py b/src/vptstools/vpts.py index c054fa6..7fc5e83 100644 --- a/src/vptstools/vpts.py +++ b/src/vptstools/vpts.py @@ -188,15 +188,17 @@ def vp(file_path, vpts_csv_version="v1.0", source_file=""): Ruleset with the VPTS CSV ruleset to use, e.g. v1.0 source_file : str | callable URL or path to the source file from which the data were derived or - a callable that converts the file_path to the source_file + a callable that converts the file_path to the source_file. See + https://aloftdata.eu/vpts-csv/#source_file for more information on + the source file field. + Examples -------- >>> file_path = Path("bejab_vp_20221111T233000Z_0x9.h5") >>> vp(file_path) >>> vp(file_path, - ... source_file="s3://aloft/baltrad/hdf5/2022/11/11/" \ - ... "bejab_vp_20221111T233000Z_0x9.h5") + ... source_file="s3://aloft/baltrad/hdf5/2022/11/11/bejab_vp_20221111T233000Z_0x9.h5") #noqa Use file name itself as source_file representation in vp file using a custom callable function diff --git a/src/vptstools/vpts_csv.py b/src/vptstools/vpts_csv.py index 3721ccc..7c35c37 100644 --- a/src/vptstools/vpts_csv.py +++ b/src/vptstools/vpts_csv.py @@ -110,7 +110,7 @@ def check_source_file(source_file, regex): Raises ------ - ValueError : : source_file not according to regex + ValueError : source_file not according to regex Examples -------- diff --git a/tests/test_vph5_to_vpts.py b/tests/test_vph5_to_vpts.py index 0bb219c..b5a01a3 100644 --- a/tests/test_vph5_to_vpts.py +++ b/tests/test_vph5_to_vpts.py @@ -12,7 +12,7 @@ def test_help(): result = runner.invoke(cli, ["--help"]) assert result.exit_code == 0 assert ( - "Convert and aggregate h5 vp files to daily/monthly vpts files on S3 bucket" + "Convert and aggregate h5 vp files to daily and monthly vpts-csv files" in result.output )