Skip to content

Commit

Permalink
feat: add a basic cli
Browse files Browse the repository at this point in the history
Also comes with config setting via env var
  • Loading branch information
jrdh committed Feb 7, 2024
1 parent 955d856 commit 47d680c
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 0 deletions.
Empty file added dataimporter/cli/__init__.py
Empty file.
78 changes: 78 additions & 0 deletions dataimporter/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import code

import click

from dataimporter.cli.utils import with_config, console
from dataimporter.importer import DataImporter
from dataimporter.lib.config import Config


@click.group("dimp")
def cli():
"""
The root CLI group for the dimp command.
"""
pass


@cli.command()
@with_config()
def emu(config: Config):
"""
Processes the available EMu exports, queuing, ingesting, and indexing one day's
worth of data at a time ensuring each day's data is represented by a new version.
"""
with DataImporter(config) as importer:
while True:
console.log("Queuing next dump set")
dates_queued = importer.queue_emu_changes(only_one=True)

# if no dates were queued, stop
if not dates_queued:
console.log("No more dumps to import, done")
break
else:
console.log(
f"Dates queued: {', '.join(d.isoformat() for d in dates_queued)}"
)

# otherwise, add changes to mongo and elasticsearch for each view
for name in ("specimen", "indexlot", "artefact", "mss", "preparation"):
console.log(f"Adding changes from {name} view to mongo")
importer.add_to_mongo(name)
console.log(f"Syncing changes from {name} view to elasticsearch")
importer.sync_to_elasticsearch(name, parallel=True)
console.log(f"Finished with {name}")


@cli.command()
@with_config()
def gbif(config: Config):
"""
Requests a new download of our specimen dataset from GBIF, downloads this DwC-A, and
queues any changes found in it, then ingests and indexes any changes that cascade
from these GBIF records to their associated specimen records.
"""
with DataImporter(config) as importer:
importer.queue_gbif_changes()
importer.add_to_mongo("specimen")
importer.sync_to_elasticsearch("specimen", parallel=True)


@cli.command()
@with_config()
def shell(config: Config):
"""
Drops the caller into a Python shell with a DataImporter object (`importer`)
instance available, thus allowing direct access to all methods.
This is provided as purely a debugging tool.
"""
with DataImporter(config) as importer:
console.print("Starting shell...")
env = {"importer": importer}
code.interact(local=env)


if __name__ == "__main__":
cli()
66 changes: 66 additions & 0 deletions dataimporter/cli/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from functools import partial
from functools import partial
from pathlib import Path
from typing import Optional, Any

import click
from click import Parameter, Context
from rich.console import Console

from dataimporter.lib.config import Config, load, ConfigLoadError

# environment variable name for config path setting
CONFIG_ENV_VAR = "DIMP_CONFIG"

# global console for all to use
console: Console = Console()


class ConfigType(click.Path):
"""
Click type allowing CLI functions to get a config object from a path.
"""

name = "config"

def __init__(self):
super().__init__(
exists=True, file_okay=True, dir_okay=False, readable=True, path_type=Path
)

def convert(
self, value: Any, param: Optional[Parameter], ctx: Optional[Context]
) -> Config:
"""
Convert the given value to a Config object.
:param value: the value passed from Click, hopefully this is a path of some kind
:param param: the parameter that is using this type to convert its value. May be
None.
:param ctx: the current context that arrived at this value. May be None.
:return: a config object
"""
if isinstance(value, Config):
return value

path: Path = Path(super().convert(value, param, ctx))
try:
return load(path)
except ConfigLoadError as e:
self.fail(
f"Failed to load config from {path} due to {e.reason}",
param,
ctx,
)
except Exception as e:
self.fail(
f"Failed to load config from {path} due to {str(e)}",
param,
ctx,
)


# decorator which adds the config click arg to any click command function
with_config = partial(
click.argument, "config", type=ConfigType(), envvar=CONFIG_ENV_VAR
)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies = [
"msgpack==1.0.7",
"ciso8601==2.3.1",
"requests==2.31.0",
"rich==13.6.0",
]
[project.optional-dependencies]
test = [
Expand Down

0 comments on commit 47d680c

Please sign in to comment.