Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include GitHub Actions #98

Merged
merged 23 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/linting.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: code-style

on:
push:
branches: 'main'
pull_request:
branches: '*'

jobs:
linting:
name: 'pre-commit hooks'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: pre-commit/[email protected]
38 changes: 38 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: debug-statements
- id: check-docstring-first
- id: check-json

- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black-jupyter

- repo: https://github.com/asottile/reorder-python-imports
rev: v3.12.0
hooks:
- id: reorder-python-imports
args: [--py38-plus, --add-import, 'from __future__ import annotations']

- repo: https://github.com/asottile/add-trailing-comma
rev: v3.1.0
hooks:
- id: add-trailing-comma

- repo: https://github.com/asottile/pyupgrade
rev: v3.15.2
hooks:
- id: pyupgrade
args: [--py38-plus]

- repo: https://github.com/PyCQA/flake8
rev: 7.0.0
hooks:
- id: flake8
args: [--max-line-length=120]
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ or you can view `${CUPID_ROOT}/examples/coupled-model/computed_notebooks/quick-r
Furthermore, to clear the `computed_notebooks` folder which was generated by the `cupid-run` and `cupid-build` commands, you can run the following command:

``` bash
$ cupid-clear
$ cupid-clear
```

This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file.
This will clear the `computed_notebooks` folder which is at the location pointed to by the `run_dir` variable in the `config.yml` file.

### CUPiD Options

Expand Down
17 changes: 9 additions & 8 deletions cupid/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@

The main function `build()` reads the configuration file (default config.yml),
extracts the necessary information such as the name of the book and the
directory containing computed notebooks, and then proceeds to clean and build the
Jupyter book using the `jupyter-book` command-line tool.
directory containing computed notebooks, and then proceeds to clean and build
the Jupyter book using the `jupyter-book` command-line tool.

Args:
CONFIG_PATH: str, path to configuration file (default config.yml)

Returns:
None
"""
from __future__ import annotations

import click
import subprocess
import sys

import click
import yaml


Expand All @@ -34,22 +35,22 @@ def build(config_path):
None
"""

with open(config_path, "r") as fid:
with open(config_path) as fid:
control = yaml.safe_load(fid)

sname = control["data_sources"]["sname"]
run_dir = control["data_sources"]["run_dir"]

subprocess.run(["jupyter-book", "clean", f"{run_dir}/computed_notebooks/{sname}"])
subprocess.run(
["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"]
["jupyter-book", "build", f"{run_dir}/computed_notebooks/{sname}", "--all"],
)

# Originally used this code to copy jupyter book HTML to a location to host it online

# if 'publish_location' in control:
# if "publish_location" in control:

# user = os.environ.get('USER')
# user = os.environ.get("USER")
# remote_mach = control["publish_location"]["remote_mach"]
# remote_dir = control["publish_location"]["remote_dir"]
# this seems more complicated than expected...people have mentioned paramiko library?
Expand Down
21 changes: 12 additions & 9 deletions cupid/clear.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
#!/usr/bin/env python
"""
This script provides functionality to clear the contents of the 'computed_notebooks' folder
at the location specified by the 'run_dir' variable in the CONFIG_PATH.
This script provides functionality to clear the contents of the "computed_notebooks" folder
at the location specified by the "run_dir" variable in the CONFIG_PATH.

The main function `clear()` takes the path to the configuration file as input, reads the config file
to obtain the 'run_dir' variable, and then deletes the contents of the 'computed_notebooks' folder
to obtain the "run_dir" variable, and then deletes the contents of the "computed_notebooks" folder
at that location.

"""
from __future__ import annotations

import os
import shutil

import click

import cupid.util


def read_config_file(config_path):
"""
Given the file path to the configuration file, this function reads the config file content and
returns the val of the run_dir string with '/computed_notebooks' appended to it
returns the val of the run_dir string with `/computed_notebooks` appended to it

Args:
CONFIG_PATH: str, path to configuration file (default config.yml)
Expand All @@ -31,26 +34,26 @@ def read_config_file(config_path):
run_dir = control["data_sources"].get("run_dir", None)

if run_dir:
# Append '/computed_notebooks' to the run_dir value if it is not empty
# Append `/computed_notebooks` to the run_dir value if it is not empty
full_path = os.path.join(run_dir, "computed_notebooks")
return full_path

# else run_dir is empty/wasn't found in config file so return error
# else run_dir is empty/was not found in config file so return error
raise ValueError("'run_dir' was empty/not found in the config file.")


@click.command()
@click.argument("config_path", default="config.yml")
# Entry point to this script
def clear(config_path):
"""Clears the contents of the 'computed_notebooks' folder at the location
specified by the 'run_dir' variable in the CONFIG_PATH.
"""Clears the contents of the "computed_notebooks" folder at the location
specified by the "run_dir" variable in the CONFIG_PATH.

Args: CONFIG_PATH - The path to the configuration file.

"""

run_dir = read_config_file(config_path)
# Delete the 'computed_notebooks' folder and all the contents inside of it
# Delete the "computed_notebooks" folder and all the contents inside of it
shutil.rmtree(run_dir)
print(f"All contents in {run_dir} have been cleared.")
7 changes: 4 additions & 3 deletions cupid/quickstart.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
### To be created: a script, maybe called through a command line entry point,
### that sets up a directory with a config.yml file and
### basics necessary to set up a notebook collection
# To be created: a script, maybe called through a command line entry point,
# that sets up a directory with a config.yml file and
# basics necessary to set up a notebook collection
from __future__ import annotations
14 changes: 8 additions & 6 deletions cupid/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- get_collection(path_to_catalog, **kwargs): Get a collection of datasets from an
intake catalog based on specified criteria.
"""
from __future__ import annotations

import intake
import yaml
Expand All @@ -21,21 +22,22 @@ def read_yaml(path_to_yaml):
def get_collection(path_to_catalog, **kwargs):
"""Get collection of datasets from intake catalog"""
cat = intake.open_esm_datastore(path_to_catalog)
### note that the json file points to the csv, so the path that the
### yaml file contains doesn't actually get used. this can cause issues
# note that the json file points to the csv, so the path that the
# yaml file contains does not actually get used. this can cause issues

cat_subset = cat.search(**kwargs)

if "variable" in kwargs.keys():
# pylint: disable=invalid-name
def preprocess(ds):
## the double brackets return a Dataset rather than a DataArray
## this is fragile and could cause issues, not sure what subsetting on time_bound does
# the double brackets return a Dataset rather than a DataArray
# this is fragile and could cause issues, not sure what subsetting on time_bound does
return ds[[kwargs["variable"], "time_bound"]]

## not sure what the chunking kwarg is doing here either
# not sure what the chunking kwarg is doing here either
dsets = cat_subset.to_dataset_dict(
xarray_open_kwargs={"chunks": {"time": -1}}, preprocess=preprocess
xarray_open_kwargs={"chunks": {"time": -1}},
preprocess=preprocess,
)

else:
Expand Down
28 changes: 16 additions & 12 deletions cupid/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python

"""
Main script for running all notebooks and scripts specified in the configuration file.

Expand All @@ -21,19 +20,24 @@
-config_path Path to the YAML configuration file containing specifications for notebooks (default: config.yml)
-h, --help Show this message and exit.
"""
from __future__ import annotations

import os
import warnings

import click
import intake
import ploomber
import cupid.util

import cupid.timeseries
import cupid.util

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])

# fmt: off
# pylint: disable=line-too-long


@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects")
@click.option("--time-series", "-ts", is_flag=True, help="Run time series generation scripts prior to diagnostics")
Expand Down Expand Up @@ -135,7 +139,7 @@ def run(
output_dir = run_dir + "/computed_notebooks/" + control["data_sources"]["sname"]
temp_data_path = run_dir + "/temp_data"
nb_path_root = os.path.realpath(
os.path.expanduser(control["data_sources"]["nb_path_root"])
os.path.expanduser(control["data_sources"]["nb_path_root"]),
)

#####################################################################
Expand All @@ -147,7 +151,7 @@ def run(

if "path_to_cat_json" in control["data_sources"]:
full_cat_path = os.path.realpath(
os.path.expanduser(control["data_sources"]["path_to_cat_json"])
os.path.expanduser(control["data_sources"]["path_to_cat_json"]),
)
full_cat = intake.open_esm_datastore(full_cat_path)

Expand All @@ -159,7 +163,7 @@ def run(
# This pulls out the name of the catalog from the path
cat_subset_name = full_cat_path.split("/")[-1].split(".")[0] + "_subset"
cat_subset.serialize(
directory=temp_data_path, name=cat_subset_name, catalog_type="file"
directory=temp_data_path, name=cat_subset_name, catalog_type="file",
)
cat_path = temp_data_path + "/" + cat_subset_name + ".json"
else:
Expand Down Expand Up @@ -191,7 +195,7 @@ def run(
all_nbs[nb]["output_dir"] = output_dir + "/" + comp_name
elif comp_bool and not all:
warnings.warn(
f"No notebooks for {comp_name} component specified in config file."
f"No notebooks for {comp_name} component specified in config file.",
)

# Checking for existence of environments
Expand All @@ -200,9 +204,9 @@ def run(
if not control["env_check"][info["kernel_name"]]:
bad_env = info["kernel_name"]
warnings.warn(
f"Environment {bad_env} specified for {nb}.ipynb could not be found;"+
f" {nb}.ipynb will not be run."+
f"See README.md for environment installation instructions."
f"Environment {bad_env} specified for {nb}.ipynb could not be found;" +
f" {nb}.ipynb will not be run." +
"See README.md for environment installation instructions.",
)
all_nbs.pop(nb)

Expand Down Expand Up @@ -234,7 +238,7 @@ def run(
all_scripts[script]["nb_path_root"] = nb_path_root + "/" + comp_name
elif comp_bool and not all:
warnings.warn(
f"No scripts for {comp_name} component specified in config file."
f"No scripts for {comp_name} component specified in config file.",
)

# Checking for existence of environments
Expand All @@ -243,8 +247,8 @@ def run(
if not control["env_check"][info["kernel_name"]]:
bad_env = info["kernel_name"]
warnings.warn(
f"Environment {bad_env} specified for {script}.py could not be found;"+
f"{script}.py will not be run."
f"Environment {bad_env} specified for {script}.py could not be found;" +
f"{script}.py will not be run.",
)
all_scripts.pop(script)

Expand Down
Loading
Loading