NCAR · mnlevy1981 · May 17, 2024 · May 9, 2024 · May 9, 2024 · May 15, 2024
diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml
@@ -0,0 +1,16 @@
+name: code-style
+
+on:
+  push:
+    branches: 'main'
+  pull_request:
+    branches: '*'
+
+jobs:
+  linting:
+    name: 'pre-commit hooks'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+      - uses: pre-commit/[email protected]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,38 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: debug-statements
+      - id: check-docstring-first
+      - id: check-json
+
+  - repo: https://github.com/psf/black
+    rev: 22.3.0
+    hooks:
+      - id: black-jupyter
+
+  - repo: https://github.com/asottile/reorder-python-imports
+    rev: v3.12.0
+    hooks:
+    -   id: reorder-python-imports
+        args: [--py38-plus, --add-import, 'from __future__ import annotations']
+
+  - repo: https://github.com/asottile/add-trailing-comma
+    rev: v3.1.0
+    hooks:
+    -   id: add-trailing-comma
+
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.15.2
+    hooks:
+    -   id: pyupgrade
+        args: [--py38-plus]
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 7.0.0
+    hooks:
+    -   id: flake8
+        args: [--max-line-length=120]
diff --git a/README.md b/README.md
@@ -64,10 +64,10 @@ or you can view `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-r
 Furthermore, to clear the `computed_notebooks` folder which was generated by the `cupid-run` and `cupid-build` commands, you can run the following command:
 
 ``` bash
-$ cupid-clear 
+$ cupid-clear
 ```
 
-This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file. 
+This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file.
 
 ### CUPiD Options
 

diff --git a/cupid/build.py b/cupid/build.py
@@ -5,19 +5,20 @@
 
 The main function `build()` reads the configuration file (default config.yml),
 extracts the necessary information such as the name of the book and the
-directory containing computed notebooks, and then proceeds to clean and build the
-Jupyter book using the `jupyter-book` command-line tool.
+directory containing computed notebooks, and then proceeds to clean and build
+the Jupyter book using the `jupyter-book` command-line tool.
 
 Args:
     CONFIG_PATH: str, path to configuration file (default config.yml)
 
 Returns:
     None
 """
+from __future__ import annotations
 
-import click
 import subprocess
-import sys
+
+import click
 import yaml
 
 
@@ -34,22 +35,22 @@ def build(config_path):
         None
     """
 
-    with open(config_path, "r") as fid:
+    with open(config_path) as fid:
         control = yaml.safe_load(fid)
 
     sname = control["data_sources"]["sname"]
     run_dir = control["data_sources"]["run_dir"]
 
     subprocess.run(["jupyter-book", "clean", f"{run_dir}/computed_notebooks/{sname}"])
     subprocess.run(
-        ["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"]
+        ["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"],
     )
 
     # Originally used this code to copy jupyter book HTML to a location to host it online
 
-    #     if 'publish_location' in control:
+    #     if "publish_location" in control:
 
-    #         user = os.environ.get('USER')
+    #         user = os.environ.get("USER")
     #         remote_mach = control["publish_location"]["remote_mach"]
     #         remote_dir = control["publish_location"]["remote_dir"]
     # this seems more complicated than expected...people have mentioned paramiko library?

diff --git a/cupid/clear.py b/cupid/clear.py
@@ -1,24 +1,27 @@
 #!/usr/bin/env python
 """
-This script provides functionality to clear the contents of the 'computed_notebooks' folder
-at the location specified by the 'run_dir' variable in the CONFIG_PATH.
+This script provides functionality to clear the contents of the "computed_notebooks" folder
+at the location specified by the "run_dir" variable in the CONFIG_PATH.
 
 The main function `clear()` takes the path to the configuration file as input, reads the config file
-to obtain the 'run_dir' variable, and then deletes the contents of the 'computed_notebooks' folder
+to obtain the "run_dir" variable, and then deletes the contents of the "computed_notebooks" folder
 at that location.
 
 """
+from __future__ import annotations
 
 import os
 import shutil
+
 import click
+
 import cupid.util
 
 
 def read_config_file(config_path):
     """
     Given the file path to the configuration file, this function reads the config file content and
-    returns the val of the run_dir string with '/computed_notebooks' appended to it
+    returns the val of the run_dir string with `/computed_notebooks` appended to it
 
     Args:
         CONFIG_PATH: str, path to configuration file (default config.yml)
@@ -31,26 +34,26 @@ def read_config_file(config_path):
     run_dir = control["data_sources"].get("run_dir", None)
 
     if run_dir:
-        # Append '/computed_notebooks' to the run_dir value if it is not empty
+        # Append `/computed_notebooks` to the run_dir value if it is not empty
         full_path = os.path.join(run_dir, "computed_notebooks")
         return full_path
 
-    # else run_dir is empty/wasn't found in config file so return error
+    # else run_dir is empty/was not found in config file so return error
     raise ValueError("'run_dir' was empty/not found in the config file.")
 
 
 @click.command()
 @click.argument("config_path", default="config.yml")
 # Entry point to this script
 def clear(config_path):
-    """Clears the contents of the 'computed_notebooks' folder at the location
-    specified by the 'run_dir' variable in the CONFIG_PATH.
+    """Clears the contents of the "computed_notebooks" folder at the location
+    specified by the "run_dir" variable in the CONFIG_PATH.
 
     Args: CONFIG_PATH - The path to the configuration file.
 
     """
 
     run_dir = read_config_file(config_path)
-    # Delete the 'computed_notebooks' folder and all the contents inside of it
+    # Delete the "computed_notebooks" folder and all the contents inside of it
     shutil.rmtree(run_dir)
     print(f"All contents in {run_dir} have been cleared.")
diff --git a/cupid/quickstart.py b/cupid/quickstart.py
@@ -1,3 +1,4 @@
-### To be created: a script, maybe called through a command line entry point,
-### that sets up a directory with a config.yml file and
-### basics necessary to set up a notebook collection
+# To be created: a script, maybe called through a command line entry point,
+# that sets up a directory with a config.yml file and
+# basics necessary to set up a notebook collection
+from __future__ import annotations
diff --git a/cupid/read.py b/cupid/read.py
@@ -6,6 +6,7 @@
     - get_collection(path_to_catalog, **kwargs): Get a collection of datasets from an
                      intake catalog based on specified criteria.
 """
+from __future__ import annotations
 
 import intake
 import yaml
@@ -21,21 +22,22 @@ def read_yaml(path_to_yaml):
 def get_collection(path_to_catalog, **kwargs):
     """Get collection of datasets from intake catalog"""
     cat = intake.open_esm_datastore(path_to_catalog)
-    ### note that the json file points to the csv, so the path that the
-    ### yaml file contains doesn't actually get used. this can cause issues
+    # note that the json file points to the csv, so the path that the
+    # yaml file contains does not actually get used. this can cause issues
 
     cat_subset = cat.search(**kwargs)
 
     if "variable" in kwargs.keys():
         # pylint: disable=invalid-name
         def preprocess(ds):
-            ## the double brackets return a Dataset rather than a DataArray
-            ## this is fragile and could cause issues, not sure what subsetting on time_bound does
+            # the double brackets return a Dataset rather than a DataArray
+            # this is fragile and could cause issues, not sure what subsetting on time_bound does
             return ds[[kwargs["variable"], "time_bound"]]
 
-        ## not sure what the chunking kwarg is doing here either
+        # not sure what the chunking kwarg is doing here either
         dsets = cat_subset.to_dataset_dict(
-            xarray_open_kwargs={"chunks": {"time": -1}}, preprocess=preprocess
+            xarray_open_kwargs={"chunks": {"time": -1}},
+            preprocess=preprocess,
         )
 
     else:

diff --git a/cupid/run.py b/cupid/run.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-
 """
 Main script for running all notebooks and scripts specified in the configuration file.
 
@@ -21,19 +20,24 @@
   -config_path        Path to the YAML configuration file containing specifications for notebooks (default: config.yml)
   -h, --help          Show this message and exit.
 """
+from __future__ import annotations
 
 import os
 import warnings
+
 import click
 import intake
 import ploomber
-import cupid.util
+
 import cupid.timeseries
+import cupid.util
 
 CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
 
 # fmt: off
 # pylint: disable=line-too-long
+
+
 @click.command(context_settings=CONTEXT_SETTINGS)
 @click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects")
 @click.option("--time-series", "-ts", is_flag=True, help="Run time series generation scripts prior to diagnostics")
@@ -135,7 +139,7 @@ def run(
     output_dir = run_dir + "/computed_notebooks/" + control["data_sources"]["sname"]
     temp_data_path = run_dir + "/temp_data"
     nb_path_root = os.path.realpath(
-        os.path.expanduser(control["data_sources"]["nb_path_root"])
+        os.path.expanduser(control["data_sources"]["nb_path_root"]),
     )
 
     #####################################################################
@@ -147,7 +151,7 @@ def run(
 
     if "path_to_cat_json" in control["data_sources"]:
         full_cat_path = os.path.realpath(
-            os.path.expanduser(control["data_sources"]["path_to_cat_json"])
+            os.path.expanduser(control["data_sources"]["path_to_cat_json"]),
         )
         full_cat = intake.open_esm_datastore(full_cat_path)
 
@@ -159,7 +163,7 @@ def run(
             # This pulls out the name of the catalog from the path
             cat_subset_name = full_cat_path.split("/")[-1].split(".")[0] + "_subset"
             cat_subset.serialize(
-                directory=temp_data_path, name=cat_subset_name, catalog_type="file"
+                directory=temp_data_path, name=cat_subset_name, catalog_type="file",
             )
             cat_path = temp_data_path + "/" + cat_subset_name + ".json"
         else:
@@ -191,7 +195,7 @@ def run(
                     all_nbs[nb]["output_dir"] = output_dir + "/" + comp_name
             elif comp_bool and not all:
                 warnings.warn(
-                    f"No notebooks for {comp_name} component specified in config file."
+                    f"No notebooks for {comp_name} component specified in config file.",
                 )
 
         # Checking for existence of environments
@@ -200,9 +204,9 @@ def run(
             if not control["env_check"][info["kernel_name"]]:
                 bad_env = info["kernel_name"]
                 warnings.warn(
-                    f"Environment {bad_env} specified for {nb}.ipynb could not be found;"+
-                    f" {nb}.ipynb will not be run."+
-                    f"See README.md for environment installation instructions."
+                    f"Environment {bad_env} specified for {nb}.ipynb could not be found;" +
+                    f" {nb}.ipynb will not be run." +
+                    "See README.md for environment installation instructions.",
                 )
                 all_nbs.pop(nb)
 
@@ -234,7 +238,7 @@ def run(
                     all_scripts[script]["nb_path_root"] = nb_path_root + "/" + comp_name
             elif comp_bool and not all:
                 warnings.warn(
-                    f"No scripts for {comp_name} component specified in config file."
+                    f"No scripts for {comp_name} component specified in config file.",
                 )
 
         # Checking for existence of environments
@@ -243,8 +247,8 @@ def run(
             if not control["env_check"][info["kernel_name"]]:
                 bad_env = info["kernel_name"]
                 warnings.warn(
-                    f"Environment {bad_env} specified for {script}.py could not be found;"+
-                    f"{script}.py will not be run."
+                    f"Environment {bad_env} specified for {script}.py could not be found;" +
+                    f"{script}.py will not be run.",
                 )
                 all_scripts.pop(script)