diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 84e3dee..46146d4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11"] env: PYTHON: ${{ matrix.python-version }} @@ -38,12 +38,17 @@ jobs: pip install ".[dev]" - name: Run mypy - run: mypy . + run: mypy . - - name: Run black + - name: Lint with ruff run: | - black nrel tests --check + ruff check + + - name: ruff format check + run: | + ruff format --check - name: Python unit tests run: | - pytest tests/ -v \ No newline at end of file + python -m unittest discover tests/ + diff --git a/README.md b/README.md index f848761..5d0abf5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Routee Powertrain
- + PyPi Latest Release diff --git a/docs/model_training-ngboost.ipynb b/docs/model_training-ngboost.ipynb index 5f9b54c..bb77034 100644 --- a/docs/model_training-ngboost.ipynb +++ b/docs/model_training-ngboost.ipynb @@ -26,17 +26,10 @@ "import nrel.routee.powertrain as pt\n", "\n", "from nrel.routee.powertrain.trainers.ngboost_trainer import NGBoostTrainer\n", - "from nrel.routee.powertrain.trainers.sklearn_random_forest import SklearnRandomForestTrainer\n", "\n", - "from nrel.routee.powertrain.estimators.ngboost_estimator import NGBoostEstimator\n", "\n", "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import time\n", - "\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n" + "\n" ] }, { @@ -53,7 +46,6 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", "\n", "df = pd.read_csv(\"../tests/routee-powertrain-test-data/sample_train_data.csv\")" ] diff --git a/nrel/routee/powertrain/__about__.py b/nrel/routee/powertrain/__about__.py new file mode 100644 index 0000000..a955fda --- /dev/null +++ b/nrel/routee/powertrain/__about__.py @@ -0,0 +1 @@ +__version__ = "1.2.1" diff --git a/nrel/routee/powertrain/__init__.py b/nrel/routee/powertrain/__init__.py index 53b2115..027f8c2 100644 --- a/nrel/routee/powertrain/__init__.py +++ b/nrel/routee/powertrain/__init__.py @@ -1,6 +1,21 @@ import logging from pathlib import Path +__all__ = [ + "DataColumn", + "FeatureSet", + "Constraints", + "TargetSet", + "Model", + "ModelConfig", + "PowertrainType", + "list_available_models", + "load_model", + "load_sample_route", + "visualize_features", + "contour_plot", +] + from .core.features import DataColumn, FeatureSet, Constraints, TargetSet from .core.model import Model from .core.model_config import ModelConfig @@ -8,8 +23,6 @@ from .io.load import list_available_models, load_model, load_sample_route from .validation.feature_visualization import visualize_features, contour_plot -__version__ = "1.0.0" - log = logging.getLogger() log.setLevel(logging.INFO) diff --git a/nrel/routee/powertrain/core/metadata.py b/nrel/routee/powertrain/core/metadata.py index d327022..f2e8367 100644 --- a/nrel/routee/powertrain/core/metadata.py +++ b/nrel/routee/powertrain/core/metadata.py @@ -33,7 +33,11 @@ def to_json(self) -> str: @classmethod def from_dict(cls, d: dict) -> Metadata: v = get_version() - if d["routee_version"] != v: + major_v = v.split(".")[0] + + incoming_v = d["routee_version"] + incoming_major_v = incoming_v.split(".")[0] + if incoming_major_v != major_v: warnings.warn( "this model was trained using routee-powertrain version " f"{d['routee_version']} but you're using version {v}" diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py index 791963a..f058d9e 100644 --- a/nrel/routee/powertrain/core/model.py +++ b/nrel/routee/powertrain/core/model.py @@ -411,7 +411,7 @@ def __repr__(self) -> str: for feature in feature_set.features: summary_lines.append(f"Feature: {feature.name} ({feature.units})") summary_lines.append( - f"Distance: {config.distance.name} " f"({config.distance.units})" + f"Distance: {config.distance.name} ({config.distance.units})" ) for target in config.target.targets: summary_lines.append(f"Target: {target.name} ({target.units})") diff --git a/nrel/routee/powertrain/core/model_config.py b/nrel/routee/powertrain/core/model_config.py index ffdc7ac..489bf10 100644 --- a/nrel/routee/powertrain/core/model_config.py +++ b/nrel/routee/powertrain/core/model_config.py @@ -95,8 +95,9 @@ def __post_init__(self): feature_set_ids = [f.features_id for f in self.feature_sets] if len(feature_set_ids) != len(set(feature_set_ids)): raise ValueError( - "Feature sets must have unique ids. " - "Found duplicate ids: {}".format(feature_set_ids) + "Feature sets must have unique ids. Found duplicate ids: {}".format( + feature_set_ids + ) ) # now check all the types diff --git a/nrel/routee/powertrain/estimators/ngboost_estimator.py b/nrel/routee/powertrain/estimators/ngboost_estimator.py index 9504a5f..dd3655e 100644 --- a/nrel/routee/powertrain/estimators/ngboost_estimator.py +++ b/nrel/routee/powertrain/estimators/ngboost_estimator.py @@ -1,13 +1,12 @@ from __future__ import annotations -# from abc import ABC, abstractmethod from pathlib import Path -import joblib import base64 import io import json import pandas as pd -from ngboost import NGBRegressor + +from importlib.util import find_spec from nrel.routee.powertrain.core.features import DataColumn, FeatureSet, TargetSet from nrel.routee.powertrain.core.model_config import PredictMethod @@ -15,7 +14,6 @@ class NGBoostEstimator(Estimator): - def __init__(self, ngboost) -> None: self.model = ngboost @@ -41,10 +39,23 @@ def to_file(self, filepath: str | Path): json.dump(self.to_dict(), f) @classmethod - def from_dict(cls, in_dict: dict) -> "NGBRegressor": + def from_dict(cls, in_dict: dict) -> NGBoostEstimator: """ Load an estimator from a bytes object in memory """ + if find_spec("ngboost") is None: + raise ImportError( + "The NGBoostEstimator estimator requires extra dependencies like joblib and ngboost. " + "To install, you can do pip install nrel.routee.powertrain[ngboost]" + ) + + if find_spec("joblib") is None: + raise ImportError( + "The NGBoostEstimator estimator requires extra dependencies like joblib and ngboost. " + "To install, you can do pip install nrel.routee.powertrain[ngboost]" + ) + else: + import joblib model_base64 = in_dict.get("ngboost_model") @@ -60,6 +71,13 @@ def to_dict(self) -> dict: """ Serialize an estimator to a python dictionary """ + try: + import joblib + except ImportError: + raise ImportError( + "The NGBoostEstimator estimator requires extra dependencies like joblib and ngboost. " + "To install, you can do pip install nrel.routee.powertrain[ngboost]" + ) byte_stream = io.BytesIO() joblib.dump(self.model, byte_stream) byte_stream.seek(0) diff --git a/nrel/routee/powertrain/estimators/sklearn/__init__.py b/nrel/routee/powertrain/estimators/sklearn/__init__.py index dab1975..a5bb962 100644 --- a/nrel/routee/powertrain/estimators/sklearn/__init__.py +++ b/nrel/routee/powertrain/estimators/sklearn/__init__.py @@ -1 +1,3 @@ from .estimator import SKLearnEstimator + +__all__ = ["SKLearnEstimator"] diff --git a/nrel/routee/powertrain/trainers/ngboost_trainer.py b/nrel/routee/powertrain/trainers/ngboost_trainer.py index 3edd58c..41b3cb4 100644 --- a/nrel/routee/powertrain/trainers/ngboost_trainer.py +++ b/nrel/routee/powertrain/trainers/ngboost_trainer.py @@ -1,8 +1,7 @@ -from enum import Enum import pandas as pd from ngboost import NGBRegressor -from ngboost.distns import Exponential, Normal +from ngboost.distns import Normal from nrel.routee.powertrain.core.model_config import ModelConfig @@ -21,7 +20,6 @@ def __init__( learning_rate: float = 0.01, random_state: int = 52, ): - self.n_estimators = n_estimators self.dist = dist self.verbose = verbose diff --git a/nrel/routee/powertrain/utils/fs.py b/nrel/routee/powertrain/utils/fs.py index af06f49..9fea999 100644 --- a/nrel/routee/powertrain/utils/fs.py +++ b/nrel/routee/powertrain/utils/fs.py @@ -13,7 +13,7 @@ def read(path: Path): return fp.read() -def get_version(path: Path = root() / "__init__.py"): +def get_version(path: Path = root() / "__about__.py"): with path.open("r") as fp: for line in fp.readlines(): if line.startswith("__version__"): diff --git a/nrel/routee/powertrain/validation/errors.py b/nrel/routee/powertrain/validation/errors.py index 62af19c..66e7d71 100644 --- a/nrel/routee/powertrain/validation/errors.py +++ b/nrel/routee/powertrain/validation/errors.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd -from scipy.stats import norm from nrel.routee.powertrain.core.features import FeatureSetId from nrel.routee.powertrain.core.model_config import ModelConfig @@ -85,6 +84,13 @@ def calculate_nll(target, target_pred, target_std) -> float: """ Calculate Negative Log-Likelihood (NLL). """ + try: + from scipy.stats import norm + except ImportError: + raise ImportError( + "The calculate_nll function requires scipy. " + "To install, you can do pip install scipy" + ) nll = -np.mean(norm.logpdf(target, loc=target_pred, scale=target_std)) return nll @@ -93,6 +99,13 @@ def calculate_crps(target, target_pred, target_std) -> float: """ Calculate Continuous Ranked Probability Score (CRPS). """ + try: + from scipy.stats import norm + except ImportError: + raise ImportError( + "The calculate_nll function requires scipy. " + "To install, you can do pip install scipy" + ) # CDF of the predicted distribution z = (target - target_pred) / target_std crps = target_std * ( @@ -344,6 +357,13 @@ def compute_errors( ) if isinstance(estimator, NGBoostEstimator): + try: + from scipy.stats import norm + except ImportError: + raise ImportError( + "The errors for the NGBoostEstimator requires other dependnecies like scipy. " + "To install, you can do `pip install nrel.routee.powertrain[ngboost]" + ) target_std = np.array(predictions[energy_name + "_std"]) alpha = 0.05 z = norm.ppf(1 - alpha / 2) # z-score for 95% confidence diff --git a/pyproject.toml b/pyproject.toml index 11e3f84..813e592 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [build-system] -requires = ["setuptools>=63.0.0", "wheel"] -build-backend = "setuptools.build_meta" +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "nrel.routee.powertrain" -version = "1.2.0" +dynamic = ["version"] description = "RouteE-Powertrain is a tool for predicting energy usage over a set of road links." readme = "README.md" authors = [{ name = "National Renewable Energy Laboratory" }] @@ -18,11 +18,11 @@ classifiers = [ "Topic :: Scientific/Engineering", ] dependencies = ["pandas", "numpy", "onnx", "onnxruntime==1.18.1"] -requires-python = ">=3.8" +requires-python = ">=3.9" [project.optional-dependencies] -scikit = ["scikit-learn", "skl2onnx"] -ngboost = ["ngboost"] +scikit = ["scikit-learn==1.2.2", "skl2onnx"] +ngboost = ["ngboost==0.5.2", "scikit-learn==1.2.2"] plot = ["matplotlib"] dev = [ "nrel.routee.powertrain[scikit]", @@ -30,10 +30,10 @@ dev = [ "nrel.routee.powertrain[ngboost]", "tqdm", "pytest", - "black", "mypy", "maturin", "ruff", + "hatch", "shapely", "boxsdk", "jupyter-book", @@ -49,20 +49,70 @@ dev = [ [project.urls] Homepage = "https://github.com/NREL/routee-powertrain" -[tool.setuptools.packages.find] -where = ["."] # list of folders that contain the packages (["."] by default) -include = [ - "nrel*", -] # package names should match these glob patterns (["*"] by default) +[tool.hatch.version] +path = "nrel/routee/powertrain/__about__.py" -[tool.setuptools.package-data] -"nrel.routee.powertrain" = ["py.typed"] +[tool.hatch.build.targets.sdist] +exclude = ["scripts/", "tests/", "docs/"] -[tool.ruff.per-file-ignores] -"__init__.py" = ["F401"] +[tool.hatch.build.targets.wheel] +packages = ["nrel/routee/powertrain"] [tool.mypy] ignore_missing_imports = true namespace_packages = true explicit_package_bases = true exclude = ["docs/", "build/", "dist/"] + +[tool.ruff] +include = ["nrel/**/*.py", "tests/*.py"] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py39" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"