From d80f17d96f2302609913f55763052efa55e0fb59 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 8 Dec 2023 09:41:20 -0800 Subject: [PATCH 01/15] unpin and update pre-commit hooks --- .pre-commit-config.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 00ba6cdf..6f141f9d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: exclude: "^src/atomate2/vasp/schemas/calc_types/" repos: - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.1.3 + rev: v0.1.7 hooks: - id: ruff args: [--fix] @@ -23,18 +23,18 @@ repos: additional_dependencies: [black] exclude: ^(README.md|paper/paper.md)$ - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 entry: pflake8 files: ^src/ additional_dependencies: - - pyproject-flake8==6.0.0 - - flake8-bugbear==22.12.6 - - flake8-typing-imports==1.14.0 - - flake8-docstrings==1.6.0 - - flake8-rst-docstrings==0.3.0 - - flake8-rst==0.8.0 + - pyproject-flake8 + - flake8-bugbear + - flake8-typing-imports + - flake8-docstrings + - flake8-rst-docstrings + - flake8-rst - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: @@ -43,7 +43,7 @@ repos: - id: rst-directive-colons - id: rst-inline-touching-normal - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.6.1 + rev: v1.7.1 hooks: - id: mypy files: ^src/ From 3f607e199e03349c2ea51a5cbb48a32e6a5eadf6 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 8 Dec 2023 09:41:30 -0800 Subject: [PATCH 02/15] add explicit stacklevel to warnings (fixes flake8 B028) --- src/jobflow/core/flow.py | 3 ++- src/jobflow/core/job.py | 3 ++- src/jobflow/core/store.py | 4 ++-- src/jobflow/managers/local.py | 2 +- src/jobflow/settings.py | 3 ++- src/jobflow/utils/enum.py | 2 +- src/jobflow/utils/graph.py | 4 +++- 7 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py index 7f9e911a..9ac5ba8a 100644 --- a/src/jobflow/core/flow.py +++ b/src/jobflow/core/flow.py @@ -274,7 +274,8 @@ def output(self, output: Any): f"Flow '{self.name}' contains a Flow or Job as an output. " f"Usually the Flow output should be the output of a Job or " f"another Flow (e.g. job.output). If this message is " - f"unexpected then double check the outputs of your Flow." + f"unexpected then double check the outputs of your Flow.", + stacklevel=2, ) # check if the jobs array contains all jobs needed for the references diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py index f11e1917..3f2f64e6 100644 --- a/src/jobflow/core/job.py +++ b/src/jobflow/core/job.py @@ -359,7 +359,8 @@ def __init__( f"Job '{self.name}' contains an Flow or Job as an input. " f"Usually inputs should be the output of a Job or an Flow (e.g. " f"job.output). If this message is unexpected then double check the " - f"inputs to your Job." + f"inputs to your Job.", + stacklevel=2, ) def __repr__(self): diff --git a/src/jobflow/core/store.py b/src/jobflow/core/store.py index ebbc3de0..e5707169 100644 --- a/src/jobflow/core/store.py +++ b/src/jobflow/core/store.py @@ -282,7 +282,7 @@ def update( from jobflow.utils.find import find_key, update_in_dictionary - if save is None or save is True: + if save in (None, True): save = self.save save_keys = _prepare_save(save) @@ -766,7 +766,7 @@ def _group_blobs(infos, locs): new_locations = [] for store_load in load.values(): for blob, location in zip(blob_infos, locations): - if store_load is True: + if store_load: new_blobs.append(blob) new_locations.append(location) elif isinstance(store_load, bool): diff --git a/src/jobflow/managers/local.py b/src/jobflow/managers/local.py index 5c1fb9fb..1b09b186 100644 --- a/src/jobflow/managers/local.py +++ b/src/jobflow/managers/local.py @@ -156,7 +156,7 @@ def _run(root_flow): response, jobflow_stopped = _run_job(job, parents) encountered_bad_response = encountered_bad_response or response is None - if jobflow_stopped is True: + if jobflow_stopped: return False return not encountered_bad_response diff --git a/src/jobflow/settings.py b/src/jobflow/settings.py index d2a7e890..8cb09153 100644 --- a/src/jobflow/settings.py +++ b/src/jobflow/settings.py @@ -137,7 +137,8 @@ def load_default_settings(cls, values): if Path(config_file_path).exists(): if Path(config_file_path).stat().st_size == 0: warnings.warn( - f"An empty JobFlow config file was located at {config_file_path}" + f"An empty JobFlow config file was located at {config_file_path}", + stacklevel=2, ) else: try: diff --git a/src/jobflow/utils/enum.py b/src/jobflow/utils/enum.py index 8e7e6c21..efdd7639 100644 --- a/src/jobflow/utils/enum.py +++ b/src/jobflow/utils/enum.py @@ -12,7 +12,7 @@ def __str__(self): def __eq__(self, other): """Compare to another enum for equality.""" - if type(self) == type(other) and self.value == other.value: + if type(self) is type(other) and self.value == other.value: return True return str(self.value) == str(other) diff --git a/src/jobflow/utils/graph.py b/src/jobflow/utils/graph.py index b253c950..b289e625 100644 --- a/src/jobflow/utils/graph.py +++ b/src/jobflow/utils/graph.py @@ -46,7 +46,9 @@ def itergraph(graph: nx.DiGraph): subgraphs = [graph.subgraph(c) for c in nx.weakly_connected_components(graph)] if len(subgraphs) > 1: - warnings.warn("Some jobs are not connected, their ordering may be random") + warnings.warn( + "Some jobs are not connected, their ordering may be random", stacklevel=2 + ) for subgraph in subgraphs: yield from nx.topological_sort(subgraph) From d3c690da0d711cc213f943994faee18a4e93227c Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 8 Dec 2023 09:43:57 -0800 Subject: [PATCH 03/15] fix dead dynamic wf doc link reported in https://github.com/openjournals/joss-reviews/issues/5995#issuecomment-1847542926 --- docs/tutorials/3-defining-jobs.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/3-defining-jobs.ipynb b/docs/tutorials/3-defining-jobs.ipynb index d36731cb..1e4ddef5 100644 --- a/docs/tutorials/3-defining-jobs.ipynb +++ b/docs/tutorials/3-defining-jobs.ipynb @@ -103,7 +103,7 @@ "id": "fatal-bible", "metadata": {}, "source": [ - "Jobs also have an index. This tracks the number of times the job has been \"replaced\" (replacing is covered in detail in the [Dynamic and nested Flows tutorial](dynamic-flows)).\n" + "Jobs also have an index. This tracks the number of times the job has been \"replaced\" (replacing is covered in detail in the [Dynamic and nested Flows tutorial](5-dynamic-flows.html)).\n" ] }, { @@ -233,7 +233,7 @@ "source": [ "from jobflow.managers.local import run_locally\n", "\n", - "response = run_locally(add(1,2))" + "response = run_locally(add(1, 2))" ] }, { From f00a5cceed44c254b78bd1891a18397fb864d47a Mon Sep 17 00:00:00 2001 From: Matthew Evans <7916000+ml-evs@users.noreply.github.com> Date: Fri, 8 Dec 2023 18:30:24 +0000 Subject: [PATCH 04/15] Fix tutorial links --- docs/tutorials.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials.rst b/docs/tutorials.rst index 0ba33463..ef546080 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -14,4 +14,4 @@ Tutorials tutorials/8-fireworks .. Note:: - [@jageo](https://github.com/JaGeo) also has a set of [Jobflow tutorials](https://jageo.github.io/Advanced_Jobflow_Tutorial/intro.html) written within the context of computational materials science applications, which you may wish to check out after exploring the basics here. + `@jageo `_ also has a set of `Jobflow tutorials `_ written within the context of computational materials science applications, which you may wish to check out after exploring the basics here. From 0e820fd122b97523674bb2d92f5a5ba9ec06d4d5 Mon Sep 17 00:00:00 2001 From: Max Gallant Date: Fri, 8 Dec 2023 11:10:46 -0800 Subject: [PATCH 05/15] Fix tutorial bug --- docs/tutorials/2-introduction.ipynb | 54 ++++++++++++++++------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/docs/tutorials/2-introduction.ipynb b/docs/tutorials/2-introduction.ipynb index d2a95426..01d9af8e 100644 --- a/docs/tutorials/2-introduction.ipynb +++ b/docs/tutorials/2-introduction.ipynb @@ -103,7 +103,7 @@ { "data": { "text/plain": [ - "OutputReference(76f9fef1-e2c7-4ad7-b090-b9153866c582)" + "OutputReference(aa2a6b1a-4846-4154-94b9-4296b2a64e5d)" ] }, "execution_count": 4, @@ -153,18 +153,24 @@ "source": [ "## Creating Flows\n", "\n", - "A `Flow` is a collection of `Job`s or other `Flow` objects. Flows are the primary tool for defining workflows in jobflow. Let's create a Flow from the jobs we just made:\n" + "A `Flow` is a collection of `Job`s or other `Flow` objects. Flows are the primary tool for defining workflows in jobflow. Let's create a Flow from the jobs we just made. We will repeat the lines we used to create them here for clarity.\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 15, "id": "danish-indonesia", "metadata": {}, "outputs": [], "source": [ "from jobflow import Flow\n", "\n", + "time_github = time_website(\"https://www.github.com\")\n", + "time_google = time_website(\"https://www.google.com\")\n", + "time_nyt = time_website(\"https://www.nytimes.com\")\n", + "\n", + "sum_times = sum_numbers([time_github.output, time_google.output, time_nyt.output])\n", + "\n", "flow = Flow([time_github, time_google, time_nyt, sum_times])" ] }, @@ -186,9 +192,9 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -327,16 +333,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "2023-06-08 10:05:39,732 INFO Started executing jobs locally\n", - "2023-06-08 10:05:39,737 INFO Starting job - time_website (76f9fef1-e2c7-4ad7-b090-b9153866c582)\n", - "2023-06-08 10:05:40,126 INFO Finished job - time_website (76f9fef1-e2c7-4ad7-b090-b9153866c582)\n", - "2023-06-08 10:05:40,128 INFO Starting job - time_website (f658ee45-b6e8-4078-98fd-9ab33a0c5747)\n", - "2023-06-08 10:05:40,306 INFO Finished job - time_website (f658ee45-b6e8-4078-98fd-9ab33a0c5747)\n", - "2023-06-08 10:05:40,307 INFO Starting job - time_website (3cd950be-b7f6-4991-84a2-420593dbe75c)\n", - "2023-06-08 10:05:40,792 INFO Finished job - time_website (3cd950be-b7f6-4991-84a2-420593dbe75c)\n", - "2023-06-08 10:05:40,793 INFO Starting job - sum_numbers (11b7679d-9d07-45b0-8c7b-f0bf42527ef0)\n", - "2023-06-08 10:05:40,798 INFO Finished job - sum_numbers (11b7679d-9d07-45b0-8c7b-f0bf42527ef0)\n", - "2023-06-08 10:05:40,799 INFO Finished executing jobs locally\n" + "2023-12-08 11:15:18,266 INFO Started executing jobs locally\n", + "2023-12-08 11:15:18,270 INFO Starting job - time_website (f7831f60-617e-490d-8854-ef3e25a78504)\n", + "2023-12-08 11:15:18,873 INFO Finished job - time_website (f7831f60-617e-490d-8854-ef3e25a78504)\n", + "2023-12-08 11:15:18,875 INFO Starting job - time_website (993990cd-872e-4c8f-823b-ac4eea8756c6)\n", + "2023-12-08 11:15:19,300 INFO Finished job - time_website (993990cd-872e-4c8f-823b-ac4eea8756c6)\n", + "2023-12-08 11:15:19,304 INFO Starting job - time_website (3c2d3abd-be33-4857-9943-6526bfb05804)\n", + "2023-12-08 11:15:19,774 INFO Finished job - time_website (3c2d3abd-be33-4857-9943-6526bfb05804)\n", + "2023-12-08 11:15:19,777 INFO Starting job - sum_numbers (2be44e87-0918-481f-ae77-7a5931074e1e)\n", + "2023-12-08 11:15:19,783 INFO Finished job - sum_numbers (2be44e87-0918-481f-ae77-7a5931074e1e)\n", + "2023-12-08 11:15:19,785 INFO Finished executing jobs locally\n" ] } ], @@ -404,10 +410,10 @@ { "data": { "text/plain": [ - "{'76f9fef1-e2c7-4ad7-b090-b9153866c582': {1: Response(output=0.07634975000000033, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)},\n", - " 'f658ee45-b6e8-4078-98fd-9ab33a0c5747': {1: Response(output=0.0056510840000001394, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)},\n", - " '3cd950be-b7f6-4991-84a2-420593dbe75c': {1: Response(output=0.15688537499999988, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)},\n", - " '11b7679d-9d07-45b0-8c7b-f0bf42527ef0': {1: Response(output=0.23888620900000035, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)}}" + "{'f7831f60-617e-490d-8854-ef3e25a78504': {1: Response(output=0.20485366717912257, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)},\n", + " '993990cd-872e-4c8f-823b-ac4eea8756c6': {1: Response(output=0.06472958298400044, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)},\n", + " '3c2d3abd-be33-4857-9943-6526bfb05804': {1: Response(output=0.18873387505300343, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)},\n", + " '2be44e87-0918-481f-ae77-7a5931074e1e': {1: Response(output=0.45831712521612644, detour=None, addition=None, replace=None, stored_data=None, stop_children=False, stop_jobflow=False)}}" ] }, "execution_count": 11, @@ -441,7 +447,7 @@ { "data": { "text/plain": [ - "0.07634975000000033" + "0.20485366717912257" ] }, "execution_count": 12, @@ -477,7 +483,7 @@ { "data": { "text/plain": [ - "0.07634975000000033" + "0.20485366717912257" ] }, "execution_count": 13, @@ -502,9 +508,9 @@ ], "metadata": { "kernelspec": { - "display_name": "atomate2", + "display_name": "jobflow-dev", "language": "python", - "name": "atomate2" + "name": "jobflow-dev" }, "language_info": { "codemirror_mode": { @@ -516,7 +522,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.10.12" } }, "nbformat": 4, From 090678827651ad5690abb5869b5ebd8235dc0764 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 8 Dec 2023 09:47:13 -0800 Subject: [PATCH 06/15] ruff auto-remove needless lambdas --- src/jobflow/core/store.py | 4 ++-- src/jobflow/settings.py | 2 +- src/jobflow/utils/find.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/jobflow/core/store.py b/src/jobflow/core/store.py index e5707169..8899b78b 100644 --- a/src/jobflow/core/store.py +++ b/src/jobflow/core/store.py @@ -282,7 +282,7 @@ def update( from jobflow.utils.find import find_key, update_in_dictionary - if save in (None, True): + if save in {None, True}: save = self.save save_keys = _prepare_save(save) @@ -766,7 +766,7 @@ def _group_blobs(infos, locs): new_locations = [] for store_load in load.values(): for blob, location in zip(blob_infos, locations): - if store_load: + if store_load is True: new_blobs.append(blob) new_locations.append(location) elif isinstance(store_load, bool): diff --git a/src/jobflow/settings.py b/src/jobflow/settings.py index 8cb09153..620914e1 100644 --- a/src/jobflow/settings.py +++ b/src/jobflow/settings.py @@ -107,7 +107,7 @@ class JobflowSettings(BaseSettings): JOB_STORE: JobStore = Field( default_factory=lambda: JobStore( MemoryStore(), - additional_stores=defaultdict(lambda: _default_additional_store()), + additional_stores=defaultdict(_default_additional_store), ), description="Default JobStore to use when running locally or using FireWorks. " "See the :obj:`JobflowSettings` docstring for more details on the " diff --git a/src/jobflow/utils/find.py b/src/jobflow/utils/find.py index 43e6903d..5dfc98e9 100644 --- a/src/jobflow/utils/find.py +++ b/src/jobflow/utils/find.py @@ -233,7 +233,7 @@ def get_root_locations(locations): >>> _get_root_locations([["a", "b"], ["a"], ["c", "d"]]) [["a"], ["c", "d"]] """ - sorted_locs = sorted(locations, key=lambda x: len(x)) + sorted_locs = sorted(locations, key=len) root_locations = [] for loc in sorted_locs: if any(loc[: len(rloc)] == rloc for rloc in root_locations): From 7666ca6dea6bd7c732e08c10293b8fa08cc9ea90 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Fri, 8 Dec 2023 09:53:19 -0800 Subject: [PATCH 07/15] CI check for dead links in jupyter notebooks and markdown docs --- .github/workflows/link-check.yml | 29 +++++++++++++++++++++++++++++ paper/paper.md | 2 +- src/jobflow/core/store.py | 2 +- 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/link-check.yml diff --git a/.github/workflows/link-check.yml b/.github/workflows/link-check.yml new file mode 100644 index 00000000..dd12e443 --- /dev/null +++ b/.github/workflows/link-check.yml @@ -0,0 +1,29 @@ +name: Check Links + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +jobs: + check_links: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + pip install pytest-check-links nbconvert + + - name: Run link check + run: | + pytest --check-links **/**/*.md **/**/*.ipynb --check-links-ignore "https://www.gauss-centre.eu" diff --git a/paper/paper.md b/paper/paper.md index fb46383c..844062d9 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -206,6 +206,6 @@ Naturally, the summary presented in this article constitutes only a small subset # Acknowledgements -This work was primarily funded and intellectually led by the Materials Project, which is funded by the U.S. Department of Energy, Office of Science, Office of Basic Energy Sciences, Materials Sciences and Engineering Division, under Contract no. DE-AC02-05-CH11231: Materials Project program KC23MP. A.S.R. acknowledges support via a Miller Research Fellowship from the Miller Institute for Basic Research in Science, University of California, Berkeley. J.G would like to acknowledge the Gauss Centre for Supercomputing e.V. ([www.gauss-centre.eu](http://www.gauss-centre.eu/)) for funding workflow-related developments by providing generous computing time on the GCS Supercomputer SuperMUC-NG at Leibniz Supercomputing Centre ([www.lrz.de](http://www.lrz.de/)) (Project pn73da). J.R. acknowledges support from the German Academic Scholarship Foundation (Studienstiftung). M.L.E. thanks the BEWARE scheme of the Wallonia-Brussels Federation for funding under the European Commission's Marie Curie-Skłodowska Action (COFUND 847587). G.P. and D.W. acknowledge Umicore for the financial support in developing the remote execution mode of jobflow. D.W. and G.M.R. acknowledge funding from the European Union’s Horizon 2020 research and innovation program under the grant agreement No 951786 (NOMAD CoE). A.M.G. is supported by EPSRC Fellowship EP/T033231/1. +This work was primarily funded and intellectually led by the Materials Project, which is funded by the U.S. Department of Energy, Office of Science, Office of Basic Energy Sciences, Materials Sciences and Engineering Division, under Contract no. DE-AC02-05-CH11231: Materials Project program KC23MP. A.S.R. acknowledges support via a Miller Research Fellowship from the Miller Institute for Basic Research in Science, University of California, Berkeley. J.G would like to acknowledge the Gauss Centre for Supercomputing e.V. () for funding workflow-related developments by providing generous computing time on the GCS Supercomputer SuperMUC-NG at Leibniz Supercomputing Centre ([www.lrz.de](http://www.lrz.de/)) (Project pn73da). J.R. acknowledges support from the German Academic Scholarship Foundation (Studienstiftung). M.L.E. thanks the BEWARE scheme of the Wallonia-Brussels Federation for funding under the European Commission's Marie Curie-Skłodowska Action (COFUND 847587). G.P. and D.W. acknowledge Umicore for the financial support in developing the remote execution mode of jobflow. D.W. and G.M.R. acknowledge funding from the European Union’s Horizon 2020 research and innovation program under the grant agreement No 951786 (NOMAD CoE). A.M.G. is supported by EPSRC Fellowship EP/T033231/1. # References diff --git a/src/jobflow/core/store.py b/src/jobflow/core/store.py index 8899b78b..62143d4a 100644 --- a/src/jobflow/core/store.py +++ b/src/jobflow/core/store.py @@ -282,7 +282,7 @@ def update( from jobflow.utils.find import find_key, update_in_dictionary - if save in {None, True}: + if save in (None, True): save = self.save save_keys = _prepare_save(save) From 8384d30e6d56630ef1c4aa89e9d1cccf31c9389d Mon Sep 17 00:00:00 2001 From: Andrew-S-Rosen Date: Fri, 8 Dec 2023 12:22:15 -0800 Subject: [PATCH 08/15] Add store documentation --- docs/index.rst | 1 + docs/stores.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 docs/stores.md diff --git a/docs/index.rst b/docs/index.rst index 77edb054..2e5d92a0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,6 +6,7 @@ Install jobflow Install FireWorks (Optional) Tutorials + Configuring Data Stores .. toctree:: :caption: Information diff --git a/docs/stores.md b/docs/stores.md new file mode 100644 index 00000000..ceba9978 --- /dev/null +++ b/docs/stores.md @@ -0,0 +1,53 @@ +# Stores + +## Overview + +Jobflow relies on the [maggma package](https://github.com/materialsproject/maggma) to provide a unified interface to a variety of data stores. By default, all calculations are run using a `MemoryStore`, which persists solely in the current process' memory. In production calculations, one will generally want to use a persistent data store, such as a MongoDB database. This also allows one to run calculations in a distributed manner with a common data store. + +For a list of all available data stores, refer to the [maggma documentation](https://materialsproject.github.io/maggma/getting_started/stores/#list-of-stores). Here, we will go over how to use Jobflow with MongoDB via a [`MongoStore`](https://materialsproject.github.io/maggma/reference/stores/#maggma.stores.mongolike.MemoryStore). + +## Configuring a `MongoStore` + +### Creating a `jobflow.yaml` File + +To modify basic Jobflow settings, you will first need to make a `jobflow.yaml` file if you haven't done so already. You will then need to define a `JOBFLOW_CONFIG_FILE` environment variable pointing to the file you made. For instance, in your `~/.bashrc` file, add the following line: + +```bash +export JOBFLOW_CONFIG_FILE="/path/to/my/jobflow.yaml" +``` + +If this environment variable is not specified, Jobflow will look for the file in `~/.jobflow.yaml`. + +### Basic Configuration + +In your `jobflow.yaml` copy the example below and fill in the fields with the appropriate values for a MongoDB store. + +```yaml title="jobflow.yaml" +JOB_STORE: + docs_store: + type: MongoStore + host: + port: 27017 + username: + password: + database: + collection_name: +``` + +### MongoDB Atlas + +If you are using a URI (as is common with MongoDB Atlas), then you will instead have a `jobflow.yaml` file that looks like the example below. Here, you will put the full URI in the `host` field. The `username` and `password` are part of the URI and so should not be included elsewhere in the YAML file. + +```yaml title="jobflow.yaml" +JOB_STORE: + docs_store: + type: MongoStore + host: + port: 27017 + database: + collection_name: +``` + +## Additional Details + +For additional details on how to specify a data store as well as the various settings available to modify in Jobflow, refer to the [API documentation](https://materialsproject.github.io/jobflow/jobflow.settings.html) for `jobflow.settings`. From 1badc258ad5f8df78cf5b29c5c5e9682e973ecb3 Mon Sep 17 00:00:00 2001 From: Alex Ganose Date: Sun, 10 Dec 2023 16:09:42 +0000 Subject: [PATCH 09/15] Create docs.yml --- .github/workflows/docs.yml | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..d66dfb87 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,55 @@ +name: build-docs + +on: + workflow_dispatch: + push: + branches: [main] + +# set GITHUB_TOKEN permissions to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +jobs: + build-docs: + if: github.repository_owner == 'materialsproject' && github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.workflow_run.head_branch }} + + - name: Install pandoc + run: sudo apt-get install pandoc + + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: pip + cache-dependency-path: pyproject.toml + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[strict,docs] + + - name: Build + run: sphinx-build docs docs_build + + - name: Upload build artifact + uses: actions/upload-pages-artifact@v2 + with: + path: ./docs_build + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + needs: build-docs + runs-on: ubuntu-latest + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 From 7524ccab9c2f1cd39d345dd05952d34a1f525a16 Mon Sep 17 00:00:00 2001 From: Alex Ganose Date: Sun, 10 Dec 2023 16:10:32 +0000 Subject: [PATCH 10/15] Delete .github/workflows/docs_manual.yml --- .github/workflows/docs_manual.yml | 38 ------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 .github/workflows/docs_manual.yml diff --git a/.github/workflows/docs_manual.yml b/.github/workflows/docs_manual.yml deleted file mode 100644 index fcaaf36d..00000000 --- a/.github/workflows/docs_manual.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: build-docs - -on: - workflow_dispatch: - -jobs: - - build-docs: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ github.event.workflow_run.head_branch }} - - - name: Install pandoc - run: sudo apt-get install pandoc - - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - cache: pip - cache-dependency-path: pyproject.toml - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install .[strict,docs] - - - name: Build - run: sphinx-build docs docs_build - - - name: Deploy - uses: peaceiris/actions-gh-pages@v3 - with: - deploy_key: ${{ secrets.ACTIONS_DEPLOY_KEY }} - publish_dir: ./docs_build From 729b4070fe7a1cc8fd782f089776cacec6586b58 Mon Sep 17 00:00:00 2001 From: Alex Ganose Date: Sun, 10 Dec 2023 16:12:15 +0000 Subject: [PATCH 11/15] Update deploy.yml --- .github/workflows/deploy.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 7aa30979..656524a2 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -81,7 +81,9 @@ jobs: ref: ${{ github.event.workflow_run.head_branch }} - name: Write release info - run: awk 'BEGIN {p = 0} {a = 0 }; /^v\d*.\d*.\d*./ { p += 1; a = 1}; p + a == 1 { print } ' CHANGELOG.md | sed -e '1,1d' | sed -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba' -e '}' > release_info.txt + awk 'BEGIN {p = 0} {a = 0 }; /^\#\#\ v\d*.\d*.\d*./ { p += 1; a = 1}; p + a == 1 { print } ' CHANGELOG.md | sed -e '1,1d' | sed -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba' -e '}' > release_info.txt + echo "" >> release_info.txt + awk '/CONTRIBUTOR SECTION/{f=1; c=0} f' CHANGELOG.md >> release_info.txt - name: Release uses: actions/create-release@v1 From ebe0130845fb12a2e33272351570c3a3c11ed747 Mon Sep 17 00:00:00 2001 From: Alex Ganose Date: Sun, 10 Dec 2023 16:20:09 +0000 Subject: [PATCH 12/15] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3ac99790..ae0d6d64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = "jobflow is a library for writing computational workflows" readme = "README.md" keywords = ["high-throughput", "workflow"] license = { text = "modified BSD" } -authors = [{ name = "Alex Ganose", email = "alexganose@gmail.com" }] +authors = [{ name = "Alex Ganose", email = "a.ganose@imperial.ac.uk" }] dynamic = ["version"] classifiers = [ "Development Status :: 5 - Production/Stable", From ae609e0afa5aea9c28086fd60c267482ab6ddbf0 Mon Sep 17 00:00:00 2001 From: Alex Ganose Date: Sun, 10 Dec 2023 16:23:34 +0000 Subject: [PATCH 13/15] Update deploy.yml --- .github/workflows/deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 656524a2..4f28238a 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: deploy-docs: # only run if commit is a push to master and the testing finished - if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.') }} + if: ${{ github.repository_owner == 'materialsproject' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.') }} runs-on: ubuntu-latest steps: @@ -44,7 +44,7 @@ jobs: deploy-pypi: # only run if commit is tagged as a version and the docs finished - if: ${{ startsWith(github.event.workflow_run.head_branch, 'v') }} + if: github.repository_owner == 'materialsproject' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.') runs-on: ubuntu-latest needs: - deploy-docs From 6f4d5121d05d2ff92da088900fc1c04859c0d97e Mon Sep 17 00:00:00 2001 From: Alex Ganose Date: Sun, 10 Dec 2023 16:27:59 +0000 Subject: [PATCH 14/15] Update PULL_REQUEST_TEMPLATE.md --- .github/PULL_REQUEST_TEMPLATE.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e893a016..9d2a61ee 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -46,4 +46,5 @@ Before a pull request can be merged, the following items must be checked: Note that the CI system will run all the above checks. But it will be much more efficient if you already fix most errors prior to submitting the PR. It is highly recommended that you use the pre-commit hook provided in the repository. Simply -`cp pre-commit .git/hooks` and a check will be run prior to allowing commits. +`pip install pre-commit` and then `pre-commit install` and a check will be run +prior to allowing commits. From ad2dbb8fe0c13870417a3a4c0823fb6245fda11e Mon Sep 17 00:00:00 2001 From: Alex Ganose Date: Sun, 10 Dec 2023 16:30:52 +0000 Subject: [PATCH 15/15] Fix linting --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9d2a61ee..100d80a3 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -46,5 +46,5 @@ Before a pull request can be merged, the following items must be checked: Note that the CI system will run all the above checks. But it will be much more efficient if you already fix most errors prior to submitting the PR. It is highly recommended that you use the pre-commit hook provided in the repository. Simply -`pip install pre-commit` and then `pre-commit install` and a check will be run +`pip install pre-commit` and then `pre-commit install` and a check will be run prior to allowing commits.