Merge branch 'main' into fix_manager_config_dynamic_jobs

materialsproject · Dec 11, 2023 · 393e71d · 393e71d
2 parents 1619bb0 + ad2dbb8
commit 393e71d
Show file tree

Hide file tree

Showing 21 changed files with 203 additions and 89 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -46,4 +46,5 @@ Before a pull request can be merged, the following items must be checked:
 Note that the CI system will run all the above checks. But it will be much more
 efficient if you already fix most errors prior to submitting the PR. It is highly
 recommended that you use the pre-commit hook provided in the repository. Simply
-`cp pre-commit .git/hooks` and a check will be run prior to allowing commits.
+`pip install pre-commit` and then `pre-commit install` and a check will be run
+prior to allowing commits.
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
   deploy-docs:
 
     # only run if commit is a push to master and the testing finished
-    if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.') }}
+    if: ${{ github.repository_owner == 'materialsproject' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.') }}
     runs-on: ubuntu-latest
 
     steps:
@@ -44,7 +44,7 @@ jobs:
   deploy-pypi:
 
     # only run if commit is tagged as a version and the docs finished
-    if: ${{ startsWith(github.event.workflow_run.head_branch, 'v') }}
+    if: github.repository_owner == 'materialsproject' && github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && startsWith(github.event.workflow_run.head_branch, 'v0.')
     runs-on: ubuntu-latest
     needs:
       - deploy-docs
@@ -81,7 +81,9 @@ jobs:
           ref: ${{ github.event.workflow_run.head_branch }}
 
       - name: Write release info
-        run: awk 'BEGIN {p = 0} {a = 0 }; /^v\d*.\d*.\d*./ { p += 1; a = 1}; p + a == 1 { print } ' CHANGELOG.md | sed -e '1,1d' | sed  -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba' -e '}' > release_info.txt
+          awk 'BEGIN {p = 0} {a = 0 }; /^\#\#\ v\d*.\d*.\d*./ { p += 1; a = 1}; p + a == 1 { print } ' CHANGELOG.md | sed -e '1,1d' | sed  -e '/./,$!d' -e :a -e '/^\n*$/{$d;N;ba' -e '}' > release_info.txt
+          echo "" >> release_info.txt
+          awk '/CONTRIBUTOR SECTION/{f=1; c=0} f' CHANGELOG.md >> release_info.txt
 
       - name: Release
         uses: actions/create-release@v1

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -0,0 +1,55 @@
+name: build-docs
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [main]
+
+# set GITHUB_TOKEN permissions to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+jobs:
+  build-docs:
+    if: github.repository_owner == 'materialsproject' && github.ref == 'refs/heads/main'
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.workflow_run.head_branch }}
+
+      - name: Install pandoc
+        run: sudo apt-get install pandoc
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+          cache: pip
+          cache-dependency-path: pyproject.toml
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[strict,docs]
+
+      - name: Build
+        run: sphinx-build docs docs_build
+
+      - name: Upload build artifact
+        uses: actions/upload-pages-artifact@v2
+        with:
+          path: ./docs_build
+
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    needs: build-docs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v2
diff --git a/.github/workflows/docs_manual.yml b/.github/workflows/docs_manual.yml
diff --git a/.github/workflows/link-check.yml b/.github/workflows/link-check.yml
@@ -0,0 +1,29 @@
+name: Check Links
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  check_links:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          pip install pytest-check-links nbconvert
+
+      - name: Run link check
+        run: |
+          pytest --check-links **/**/*.md **/**/*.ipynb --check-links-ignore "https://www.gauss-centre.eu"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@ default_language_version:
 exclude: "^src/atomate2/vasp/schemas/calc_types/"
 repos:
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.1.3
+    rev: v0.1.7
     hooks:
       - id: ruff
         args: [--fix]
@@ -23,18 +23,18 @@ repos:
         additional_dependencies: [black]
         exclude: ^(README.md|paper/paper.md)$
   - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
+    rev: 6.1.0
     hooks:
       - id: flake8
         entry: pflake8
         files: ^src/
         additional_dependencies:
-          - pyproject-flake8==6.0.0
-          - flake8-bugbear==22.12.6
-          - flake8-typing-imports==1.14.0
-          - flake8-docstrings==1.6.0
-          - flake8-rst-docstrings==0.3.0
-          - flake8-rst==0.8.0
+          - pyproject-flake8
+          - flake8-bugbear
+          - flake8-typing-imports
+          - flake8-docstrings
+          - flake8-rst-docstrings
+          - flake8-rst
   - repo: https://github.com/pre-commit/pygrep-hooks
     rev: v1.10.0
     hooks:
@@ -43,7 +43,7 @@ repos:
       - id: rst-directive-colons
       - id: rst-inline-touching-normal
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.6.1
+    rev: v1.7.1
     hooks:
       - id: mypy
         files: ^src/

diff --git a/docs/index.rst b/docs/index.rst
@@ -6,6 +6,7 @@
    Install jobflow <install>
    Install FireWorks (Optional) <install_fireworks>
    Tutorials <tutorials>
+   Configuring Data Stores <stores>
 
 .. toctree::
    :caption: Information

diff --git a/docs/stores.md b/docs/stores.md
@@ -0,0 +1,53 @@
+# Stores
+
+## Overview
+
+Jobflow relies on the [maggma package](https://github.com/materialsproject/maggma) to provide a unified interface to a variety of data stores. By default, all calculations are run using a `MemoryStore`, which persists solely in the current process' memory. In production calculations, one will generally want to use a persistent data store, such as a MongoDB database. This also allows one to run calculations in a distributed manner with a common data store.
+
+For a list of all available data stores, refer to the [maggma documentation](https://materialsproject.github.io/maggma/getting_started/stores/#list-of-stores). Here, we will go over how to use Jobflow with MongoDB via a [`MongoStore`](https://materialsproject.github.io/maggma/reference/stores/#maggma.stores.mongolike.MemoryStore).
+
+## Configuring a `MongoStore`
+
+### Creating a `jobflow.yaml` File
+
+To modify basic Jobflow settings, you will first need to make a `jobflow.yaml` file if you haven't done so already. You will then need to define a `JOBFLOW_CONFIG_FILE` environment variable pointing to the file you made. For instance, in your `~/.bashrc` file, add the following line:
+
+```bash
+export JOBFLOW_CONFIG_FILE="/path/to/my/jobflow.yaml"
+```
+
+If this environment variable is not specified, Jobflow will look for the file in `~/.jobflow.yaml`.
+
+### Basic Configuration
+
+In your `jobflow.yaml` copy the example below and fill in the fields with the appropriate values for a MongoDB store.
+
+```yaml title="jobflow.yaml"
+JOB_STORE:
+  docs_store:
+    type: MongoStore
+    host: <host name>
+    port: 27017
+    username: <username>
+    password: <password>
+    database: <database name>
+    collection_name: <collection name>
+```
+
+### MongoDB Atlas
+
+If you are using a URI (as is common with MongoDB Atlas), then you will instead have a `jobflow.yaml` file that looks like the example below. Here, you will put the full URI in the `host` field. The `username` and `password` are part of the URI and so should not be included elsewhere in the YAML file.
+
+```yaml title="jobflow.yaml"
+JOB_STORE:
+  docs_store:
+    type: MongoStore
+    host: <URI>
+    port: 27017
+    database: <database name>
+    collection_name: <collection name>
+```
+
+## Additional Details
+
+For additional details on how to specify a data store as well as the various settings available to modify in Jobflow, refer to the [API documentation](https://materialsproject.github.io/jobflow/jobflow.settings.html) for `jobflow.settings`.
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
@@ -14,4 +14,4 @@ Tutorials
    tutorials/8-fireworks
 
 .. Note::
-    [@jageo](https://github.com/JaGeo) also has a set of [Jobflow tutorials](https://jageo.github.io/Advanced_Jobflow_Tutorial/intro.html) written within the context of computational materials science applications, which you may wish to check out after exploring the basics here.
+    `@jageo <https://github.com/JaGeo>`_ also has a set of `Jobflow tutorials <https://jageo.github.io/Advanced_Jobflow_Tutorial/intro.html>`_ written within the context of computational materials science applications, which you may wish to check out after exploring the basics here.
diff --git a/docs/tutorials/2-introduction.ipynb b/docs/tutorials/2-introduction.ipynb
diff --git a/docs/tutorials/3-defining-jobs.ipynb b/docs/tutorials/3-defining-jobs.ipynb
@@ -103,7 +103,7 @@
    "id": "fatal-bible",
    "metadata": {},
    "source": [
-    "Jobs also have an index. This tracks the number of times the job has been \"replaced\" (replacing is covered in detail in the [Dynamic and nested Flows tutorial](dynamic-flows)).\n"
+    "Jobs also have an index. This tracks the number of times the job has been \"replaced\" (replacing is covered in detail in the [Dynamic and nested Flows tutorial](5-dynamic-flows.html)).\n"
    ]
   },
   {
@@ -233,7 +233,7 @@
    "source": [
     "from jobflow.managers.local import run_locally\n",
     "\n",
-    "response = run_locally(add(1,2))"
+    "response = run_locally(add(1, 2))"
    ]
   },
   {

diff --git a/paper/paper.md b/paper/paper.md
@@ -206,6 +206,6 @@ Naturally, the summary presented in this article constitutes only a small subset
 
 # Acknowledgements
 
-This work was primarily funded and intellectually led by the Materials Project, which is funded by the U.S. Department of Energy, Office of Science, Office of Basic Energy Sciences, Materials Sciences and Engineering Division, under Contract no. DE-AC02-05-CH11231: Materials Project program KC23MP. A.S.R. acknowledges support via a Miller Research Fellowship from the Miller Institute for Basic Research in Science, University of California, Berkeley. J.G would like to acknowledge the Gauss Centre for Supercomputing e.V. ([www.gauss-centre.eu](http://www.gauss-centre.eu/)) for funding workflow-related developments by providing generous computing time on the GCS Supercomputer SuperMUC-NG at Leibniz Supercomputing Centre ([www.lrz.de](http://www.lrz.de/)) (Project pn73da). J.R. acknowledges support from the German Academic Scholarship Foundation (Studienstiftung). M.L.E. thanks the BEWARE scheme of the Wallonia-Brussels Federation for funding under the European Commission's Marie Curie-Skłodowska Action (COFUND 847587). G.P. and D.W. acknowledge Umicore for the financial support in developing the remote execution mode of jobflow. D.W. and G.M.R. acknowledge funding from the European Union’s Horizon 2020 research and innovation program under the grant agreement No 951786 (NOMAD CoE). A.M.G. is supported by EPSRC Fellowship EP/T033231/1.
+This work was primarily funded and intellectually led by the Materials Project, which is funded by the U.S. Department of Energy, Office of Science, Office of Basic Energy Sciences, Materials Sciences and Engineering Division, under Contract no. DE-AC02-05-CH11231: Materials Project program KC23MP. A.S.R. acknowledges support via a Miller Research Fellowship from the Miller Institute for Basic Research in Science, University of California, Berkeley. J.G would like to acknowledge the Gauss Centre for Supercomputing e.V. (<https://www.gauss-centre.eu>) for funding workflow-related developments by providing generous computing time on the GCS Supercomputer SuperMUC-NG at Leibniz Supercomputing Centre ([www.lrz.de](http://www.lrz.de/)) (Project pn73da). J.R. acknowledges support from the German Academic Scholarship Foundation (Studienstiftung). M.L.E. thanks the BEWARE scheme of the Wallonia-Brussels Federation for funding under the European Commission's Marie Curie-Skłodowska Action (COFUND 847587). G.P. and D.W. acknowledge Umicore for the financial support in developing the remote execution mode of jobflow. D.W. and G.M.R. acknowledge funding from the European Union’s Horizon 2020 research and innovation program under the grant agreement No 951786 (NOMAD CoE). A.M.G. is supported by EPSRC Fellowship EP/T033231/1.
 
 # References
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ description = "jobflow is a library for writing computational workflows"
 readme = "README.md"
 keywords = ["high-throughput", "workflow"]
 license = { text = "modified BSD" }
-authors = [{ name = "Alex Ganose", email = "[email protected]" }]
+authors = [{ name = "Alex Ganose", email = "[email protected]" }]
 dynamic = ["version"]
 classifiers = [
     "Development Status :: 5 - Production/Stable",

diff --git a/src/jobflow/core/flow.py b/src/jobflow/core/flow.py
@@ -274,7 +274,8 @@ def output(self, output: Any):
                     f"Flow '{self.name}' contains a Flow or Job as an output. "
                     f"Usually the Flow output should be the output of a Job or "
                     f"another Flow (e.g. job.output). If this message is "
-                    f"unexpected then double check the outputs of your Flow."
+                    f"unexpected then double check the outputs of your Flow.",
+                    stacklevel=2,
                 )
 
             # check if the jobs array contains all jobs needed for the references

diff --git a/src/jobflow/core/job.py b/src/jobflow/core/job.py
@@ -359,7 +359,8 @@ def __init__(
                 f"Job '{self.name}' contains an Flow or Job as an input. "
                 f"Usually inputs should be the output of a Job or an Flow (e.g. "
                 f"job.output). If this message is unexpected then double check the "
-                f"inputs to your Job."
+                f"inputs to your Job.",
+                stacklevel=2,
             )
 
     def __repr__(self):

diff --git a/src/jobflow/core/store.py b/src/jobflow/core/store.py
@@ -282,7 +282,7 @@ def update(
 
         from jobflow.utils.find import find_key, update_in_dictionary
 
-        if save is None or save is True:
+        if save in (None, True):
             save = self.save
 
         save_keys = _prepare_save(save)

diff --git a/src/jobflow/managers/local.py b/src/jobflow/managers/local.py
@@ -156,7 +156,7 @@ def _run(root_flow):
                 response, jobflow_stopped = _run_job(job, parents)
 
             encountered_bad_response = encountered_bad_response or response is None
-            if jobflow_stopped is True:
+            if jobflow_stopped:
                 return False
 
         return not encountered_bad_response

diff --git a/src/jobflow/settings.py b/src/jobflow/settings.py
@@ -107,7 +107,7 @@ class JobflowSettings(BaseSettings):
     JOB_STORE: JobStore = Field(
         default_factory=lambda: JobStore(
             MemoryStore(),
-            additional_stores=defaultdict(lambda: _default_additional_store()),
+            additional_stores=defaultdict(_default_additional_store),
         ),
         description="Default JobStore to use when running locally or using FireWorks. "
         "See the :obj:`JobflowSettings` docstring for more details on the "
@@ -137,7 +137,8 @@ def load_default_settings(cls, values):
         if Path(config_file_path).exists():
             if Path(config_file_path).stat().st_size == 0:
                 warnings.warn(
-                    f"An empty JobFlow config file was located at {config_file_path}"
+                    f"An empty JobFlow config file was located at {config_file_path}",
+                    stacklevel=2,
                 )
             else:
                 try:

diff --git a/src/jobflow/utils/enum.py b/src/jobflow/utils/enum.py
@@ -12,7 +12,7 @@ def __str__(self):
 
     def __eq__(self, other):
         """Compare to another enum for equality."""
-        if type(self) == type(other) and self.value == other.value:
+        if type(self) is type(other) and self.value == other.value:
             return True
         return str(self.value) == str(other)
 

diff --git a/src/jobflow/utils/find.py b/src/jobflow/utils/find.py
@@ -233,7 +233,7 @@ def get_root_locations(locations):
         >>> _get_root_locations([["a", "b"], ["a"], ["c", "d"]])
         [["a"], ["c", "d"]]
     """
-    sorted_locs = sorted(locations, key=lambda x: len(x))
+    sorted_locs = sorted(locations, key=len)
     root_locations = []
     for loc in sorted_locs:
         if any(loc[: len(rloc)] == rloc for rloc in root_locations):

diff --git a/src/jobflow/utils/graph.py b/src/jobflow/utils/graph.py
@@ -46,7 +46,9 @@ def itergraph(graph: nx.DiGraph):
     subgraphs = [graph.subgraph(c) for c in nx.weakly_connected_components(graph)]
 
     if len(subgraphs) > 1:
-        warnings.warn("Some jobs are not connected, their ordering may be random")
+        warnings.warn(
+            "Some jobs are not connected, their ordering may be random", stacklevel=2
+        )
 
     for subgraph in subgraphs:
         yield from nx.topological_sort(subgraph)