Skip to content

Commit

Permalink
Merge branch 'main' into fix_manager_config_dynamic_jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
Fabian Peschel committed Dec 11, 2023
2 parents 1619bb0 + ad2dbb8 commit 393e71d
Show file tree
Hide file tree
Showing 21 changed files with 203 additions and 89 deletions.
3 changes: 2 additions & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,5 @@ Before a pull request can be merged, the following items must be checked:
Note that the CI system will run all the above checks. But it will be much more
efficient if you already fix most errors prior to submitting the PR. It is highly
recommended that you use the pre-commit hook provided in the repository. Simply
`cp pre-commit .git/hooks` and a check will be run prior to allowing commits.
`pip install pre-commit` and then `pre-commit install` and a check will be run
prior to allowing commits.
8 changes: 5 additions & 3 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
deploy-docs:

# only run if commit is a push to master and the testing finished
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.') }}
if: ${{ github.repository_owner == 'materialsproject' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.') }}
runs-on: ubuntu-latest

steps:
Expand Down Expand Up @@ -44,7 +44,7 @@ jobs:
deploy-pypi:

# only run if commit is tagged as a version and the docs finished
if: ${{ startsWith(github.event.workflow_run.head_branch, 'v') }}
if: github.repository_owner == 'materialsproject' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.')
runs-on: ubuntu-latest
needs:
- deploy-docs
Expand Down Expand Up @@ -81,7 +81,9 @@ jobs:
ref: ${{ github.event.workflow_run.head_branch }}

- name: Write release info
run: awk 'BEGIN {p = 0} {a = 0 }; /^v\d*.\d*.\d*./ { p += 1; a = 1}; p + a == 1 { print } ' CHANGELOG.md | sed -e '1,1d' | sed -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba' -e '}' > release_info.txt
awk 'BEGIN {p = 0} {a = 0 }; /^\#\#\ v\d*.\d*.\d*./ { p += 1; a = 1}; p + a == 1 { print } ' CHANGELOG.md | sed -e '1,1d' | sed -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba' -e '}' > release_info.txt
echo "" >> release_info.txt
awk '/CONTRIBUTOR SECTION/{f=1; c=0} f' CHANGELOG.md >> release_info.txt

- name: Release
uses: actions/create-release@v1
Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: build-docs

on:
workflow_dispatch:
push:
branches: [main]

# set GITHUB_TOKEN permissions to allow deployment to GitHub Pages
permissions:
contents: read
pages: write
id-token: write

jobs:
build-docs:
if: github.repository_owner == 'materialsproject' && github.ref == 'refs/heads/main'
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
ref: ${{ github.event.workflow_run.head_branch }}

- name: Install pandoc
run: sudo apt-get install pandoc

- uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: pip
cache-dependency-path: pyproject.toml

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[strict,docs]
- name: Build
run: sphinx-build docs docs_build

- name: Upload build artifact
uses: actions/upload-pages-artifact@v2
with:
path: ./docs_build

deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
needs: build-docs
runs-on: ubuntu-latest
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2
38 changes: 0 additions & 38 deletions .github/workflows/docs_manual.yml

This file was deleted.

29 changes: 29 additions & 0 deletions .github/workflows/link-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Check Links

on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:

jobs:
check_links:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install dependencies
run: |
pip install pytest-check-links nbconvert
- name: Run link check
run: |
pytest --check-links **/**/*.md **/**/*.ipynb --check-links-ignore "https://www.gauss-centre.eu"
18 changes: 9 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ default_language_version:
exclude: "^src/atomate2/vasp/schemas/calc_types/"
repos:
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.1.3
rev: v0.1.7
hooks:
- id: ruff
args: [--fix]
Expand All @@ -23,18 +23,18 @@ repos:
additional_dependencies: [black]
exclude: ^(README.md|paper/paper.md)$
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
entry: pflake8
files: ^src/
additional_dependencies:
- pyproject-flake8==6.0.0
- flake8-bugbear==22.12.6
- flake8-typing-imports==1.14.0
- flake8-docstrings==1.6.0
- flake8-rst-docstrings==0.3.0
- flake8-rst==0.8.0
- pyproject-flake8
- flake8-bugbear
- flake8-typing-imports
- flake8-docstrings
- flake8-rst-docstrings
- flake8-rst
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
hooks:
Expand All @@ -43,7 +43,7 @@ repos:
- id: rst-directive-colons
- id: rst-inline-touching-normal
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
rev: v1.7.1
hooks:
- id: mypy
files: ^src/
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Install jobflow <install>
Install FireWorks (Optional) <install_fireworks>
Tutorials <tutorials>
Configuring Data Stores <stores>

.. toctree::
:caption: Information
Expand Down
53 changes: 53 additions & 0 deletions docs/stores.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Stores

## Overview

Jobflow relies on the [maggma package](https://github.com/materialsproject/maggma) to provide a unified interface to a variety of data stores. By default, all calculations are run using a `MemoryStore`, which persists solely in the current process' memory. In production calculations, one will generally want to use a persistent data store, such as a MongoDB database. This also allows one to run calculations in a distributed manner with a common data store.

For a list of all available data stores, refer to the [maggma documentation](https://materialsproject.github.io/maggma/getting_started/stores/#list-of-stores). Here, we will go over how to use Jobflow with MongoDB via a [`MongoStore`](https://materialsproject.github.io/maggma/reference/stores/#maggma.stores.mongolike.MemoryStore).

## Configuring a `MongoStore`

### Creating a `jobflow.yaml` File

To modify basic Jobflow settings, you will first need to make a `jobflow.yaml` file if you haven't done so already. You will then need to define a `JOBFLOW_CONFIG_FILE` environment variable pointing to the file you made. For instance, in your `~/.bashrc` file, add the following line:

```bash
export JOBFLOW_CONFIG_FILE="/path/to/my/jobflow.yaml"
```

If this environment variable is not specified, Jobflow will look for the file in `~/.jobflow.yaml`.

### Basic Configuration

In your `jobflow.yaml` copy the example below and fill in the fields with the appropriate values for a MongoDB store.

```yaml title="jobflow.yaml"
JOB_STORE:
docs_store:
type: MongoStore
host: <host name>
port: 27017
username: <username>
password: <password>
database: <database name>
collection_name: <collection name>
```
### MongoDB Atlas
If you are using a URI (as is common with MongoDB Atlas), then you will instead have a `jobflow.yaml` file that looks like the example below. Here, you will put the full URI in the `host` field. The `username` and `password` are part of the URI and so should not be included elsewhere in the YAML file.

```yaml title="jobflow.yaml"
JOB_STORE:
docs_store:
type: MongoStore
host: <URI>
port: 27017
database: <database name>
collection_name: <collection name>
```

## Additional Details

For additional details on how to specify a data store as well as the various settings available to modify in Jobflow, refer to the [API documentation](https://materialsproject.github.io/jobflow/jobflow.settings.html) for `jobflow.settings`.
2 changes: 1 addition & 1 deletion docs/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ Tutorials
tutorials/8-fireworks

.. Note::
[@jageo](https://github.com/JaGeo) also has a set of [Jobflow tutorials](https://jageo.github.io/Advanced_Jobflow_Tutorial/intro.html) written within the context of computational materials science applications, which you may wish to check out after exploring the basics here.
`@jageo <https://github.com/JaGeo>`_ also has a set of `Jobflow tutorials <https://jageo.github.io/Advanced_Jobflow_Tutorial/intro.html>`_ written within the context of computational materials science applications, which you may wish to check out after exploring the basics here.
54 changes: 30 additions & 24 deletions docs/tutorials/2-introduction.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/tutorials/3-defining-jobs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
"id": "fatal-bible",
"metadata": {},
"source": [
"Jobs also have an index. This tracks the number of times the job has been \"replaced\" (replacing is covered in detail in the [Dynamic and nested Flows tutorial](dynamic-flows)).\n"
"Jobs also have an index. This tracks the number of times the job has been \"replaced\" (replacing is covered in detail in the [Dynamic and nested Flows tutorial](5-dynamic-flows.html)).\n"
]
},
{
Expand Down Expand Up @@ -233,7 +233,7 @@
"source": [
"from jobflow.managers.local import run_locally\n",
"\n",
"response = run_locally(add(1,2))"
"response = run_locally(add(1, 2))"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,6 @@ Naturally, the summary presented in this article constitutes only a small subset

# Acknowledgements

This work was primarily funded and intellectually led by the Materials Project, which is funded by the U.S. Department of Energy, Office of Science, Office of Basic Energy Sciences, Materials Sciences and Engineering Division, under Contract no. DE-AC02-05-CH11231: Materials Project program KC23MP. A.S.R. acknowledges support via a Miller Research Fellowship from the Miller Institute for Basic Research in Science, University of California, Berkeley. J.G would like to acknowledge the Gauss Centre for Supercomputing e.V. ([www.gauss-centre.eu](http://www.gauss-centre.eu/)) for funding workflow-related developments by providing generous computing time on the GCS Supercomputer SuperMUC-NG at Leibniz Supercomputing Centre ([www.lrz.de](http://www.lrz.de/)) (Project pn73da). J.R. acknowledges support from the German Academic Scholarship Foundation (Studienstiftung). M.L.E. thanks the BEWARE scheme of the Wallonia-Brussels Federation for funding under the European Commission's Marie Curie-Skłodowska Action (COFUND 847587). G.P. and D.W. acknowledge Umicore for the financial support in developing the remote execution mode of jobflow. D.W. and G.M.R. acknowledge funding from the European Union’s Horizon 2020 research and innovation program under the grant agreement No 951786 (NOMAD CoE). A.M.G. is supported by EPSRC Fellowship EP/T033231/1.
This work was primarily funded and intellectually led by the Materials Project, which is funded by the U.S. Department of Energy, Office of Science, Office of Basic Energy Sciences, Materials Sciences and Engineering Division, under Contract no. DE-AC02-05-CH11231: Materials Project program KC23MP. A.S.R. acknowledges support via a Miller Research Fellowship from the Miller Institute for Basic Research in Science, University of California, Berkeley. J.G would like to acknowledge the Gauss Centre for Supercomputing e.V. (<https://www.gauss-centre.eu>) for funding workflow-related developments by providing generous computing time on the GCS Supercomputer SuperMUC-NG at Leibniz Supercomputing Centre ([www.lrz.de](http://www.lrz.de/)) (Project pn73da). J.R. acknowledges support from the German Academic Scholarship Foundation (Studienstiftung). M.L.E. thanks the BEWARE scheme of the Wallonia-Brussels Federation for funding under the European Commission's Marie Curie-Skłodowska Action (COFUND 847587). G.P. and D.W. acknowledge Umicore for the financial support in developing the remote execution mode of jobflow. D.W. and G.M.R. acknowledge funding from the European Union’s Horizon 2020 research and innovation program under the grant agreement No 951786 (NOMAD CoE). A.M.G. is supported by EPSRC Fellowship EP/T033231/1.

# References
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ description = "jobflow is a library for writing computational workflows"
readme = "README.md"
keywords = ["high-throughput", "workflow"]
license = { text = "modified BSD" }
authors = [{ name = "Alex Ganose", email = "[email protected]" }]
authors = [{ name = "Alex Ganose", email = "[email protected]" }]
dynamic = ["version"]
classifiers = [
"Development Status :: 5 - Production/Stable",
Expand Down
3 changes: 2 additions & 1 deletion src/jobflow/core/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ def output(self, output: Any):
f"Flow '{self.name}' contains a Flow or Job as an output. "
f"Usually the Flow output should be the output of a Job or "
f"another Flow (e.g. job.output). If this message is "
f"unexpected then double check the outputs of your Flow."
f"unexpected then double check the outputs of your Flow.",
stacklevel=2,
)

# check if the jobs array contains all jobs needed for the references
Expand Down
3 changes: 2 additions & 1 deletion src/jobflow/core/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,8 @@ def __init__(
f"Job '{self.name}' contains an Flow or Job as an input. "
f"Usually inputs should be the output of a Job or an Flow (e.g. "
f"job.output). If this message is unexpected then double check the "
f"inputs to your Job."
f"inputs to your Job.",
stacklevel=2,
)

def __repr__(self):
Expand Down
2 changes: 1 addition & 1 deletion src/jobflow/core/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def update(

from jobflow.utils.find import find_key, update_in_dictionary

if save is None or save is True:
if save in (None, True):
save = self.save

save_keys = _prepare_save(save)
Expand Down
2 changes: 1 addition & 1 deletion src/jobflow/managers/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def _run(root_flow):
response, jobflow_stopped = _run_job(job, parents)

encountered_bad_response = encountered_bad_response or response is None
if jobflow_stopped is True:
if jobflow_stopped:
return False

return not encountered_bad_response
Expand Down
5 changes: 3 additions & 2 deletions src/jobflow/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class JobflowSettings(BaseSettings):
JOB_STORE: JobStore = Field(
default_factory=lambda: JobStore(
MemoryStore(),
additional_stores=defaultdict(lambda: _default_additional_store()),
additional_stores=defaultdict(_default_additional_store),
),
description="Default JobStore to use when running locally or using FireWorks. "
"See the :obj:`JobflowSettings` docstring for more details on the "
Expand Down Expand Up @@ -137,7 +137,8 @@ def load_default_settings(cls, values):
if Path(config_file_path).exists():
if Path(config_file_path).stat().st_size == 0:
warnings.warn(
f"An empty JobFlow config file was located at {config_file_path}"
f"An empty JobFlow config file was located at {config_file_path}",
stacklevel=2,
)
else:
try:
Expand Down
2 changes: 1 addition & 1 deletion src/jobflow/utils/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __str__(self):

def __eq__(self, other):
"""Compare to another enum for equality."""
if type(self) == type(other) and self.value == other.value:
if type(self) is type(other) and self.value == other.value:
return True
return str(self.value) == str(other)

Expand Down
2 changes: 1 addition & 1 deletion src/jobflow/utils/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def get_root_locations(locations):
>>> _get_root_locations([["a", "b"], ["a"], ["c", "d"]])
[["a"], ["c", "d"]]
"""
sorted_locs = sorted(locations, key=lambda x: len(x))
sorted_locs = sorted(locations, key=len)
root_locations = []
for loc in sorted_locs:
if any(loc[: len(rloc)] == rloc for rloc in root_locations):
Expand Down
4 changes: 3 additions & 1 deletion src/jobflow/utils/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ def itergraph(graph: nx.DiGraph):
subgraphs = [graph.subgraph(c) for c in nx.weakly_connected_components(graph)]

if len(subgraphs) > 1:
warnings.warn("Some jobs are not connected, their ordering may be random")
warnings.warn(
"Some jobs are not connected, their ordering may be random", stacklevel=2
)

for subgraph in subgraphs:
yield from nx.topological_sort(subgraph)
Expand Down

0 comments on commit 393e71d

Please sign in to comment.