astartes v1.1.0 🎉 (#136)

This PR includes some small cleanup tasks that have aggregated since previous versions that will be included in the 1.1 release, which primarily features #122.
JacksonBurns · Jun 28, 2023 · 4e1b143 · 4e1b143
2 parents 13f5ba5 + c2e2846
commit 4e1b143
Show file tree

Hide file tree

Showing 8 changed files with 49 additions and 24 deletions.
diff --git a/.github/workflows/ipynb_ci.yml b/.github/workflows/ipynb_ci.yml
@@ -21,13 +21,15 @@ jobs:
     name: Check ${{ matrix.nb-file }} Notebook Execution
     steps:
       - uses: actions/checkout@v3
-      - uses: mamba-org/provision-with-micromamba@main
+      - uses: mamba-org/setup-micromamba@main
         with:
-          environment-file: false
           environment-name: temp
-          channels: defaults,conda-forge
-          channel-priority: flexible
-          extra-specs: |
+          condarc: |
+            channels:
+              - defaults
+              - conda-forge
+            channel_priority: flexible
+          create-args: |
             python=3.11
       - name: Install dependencies
         run: |

diff --git a/.github/workflows/reproduce_paper.yml b/.github/workflows/reproduce_paper.yml
@@ -25,13 +25,15 @@ jobs:
     name: Reproduce Paper Data Splits
     steps:
       - uses: actions/checkout@v3
-      - uses: mamba-org/provision-with-micromamba@main
+      - uses: mamba-org/setup-micromamba@main
         with:
-          environment-file: false
           environment-name: temp
-          channels: defaults,conda-forge
-          channel-priority: flexible
-          extra-specs: |
+          condarc: |
+            channels:
+              - defaults
+              - conda-forge
+            channel_priority: flexible
+          create-args: |
             python=3.11
       - name: Install Dependencies
         run: |

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -28,13 +28,15 @@ jobs:
     name: ${{ matrix.os }} Python ${{ matrix.python-version }} Subtest
     steps:
       - uses: actions/checkout@v3
-      - uses: mamba-org/provision-with-micromamba@main
+      - uses: mamba-org/setup-micromamba@main
         with:
-          environment-file: false
           environment-name: temp
-          channels: defaults,conda-forge
-          channel-priority: flexible
-          extra-specs: |
+          condarc: |
+            channels:
+              - defaults
+              - conda-forge
+            channel_priority: flexible
+          create-args: |
             python=${{ matrix.python-version }}
       - name: Install Dependencies
         run: |

diff --git a/README.md b/README.md
@@ -126,15 +126,19 @@ Configuration options for the featurization scheme can be found in the documenta
 To that end, the default behavior of `astartes` is to use `42` as the random seed and _always_ set it.
 Running `astartes` with the default settings will always produce the exact same results.
 We have verified this behavior on Debian Ubuntu, Windows, and Intel Macs from Python versions 3.7 through 3.11 (with appropriate dependencies for each version).
-We are limited in our ability to test on M1 Macs, but from our limited manual testing we achieve perfect reproducbility in all cases _except occasionally_ with `KMeans` on Apple silicon. It has produced _slightly_ different results between platforms regardless of `random_state`, with up to two clusters being assigned differently resulting in data splits which are >99% identical. `astartes` is still consistent between runs on the same platform in all cases.
 
-## Evaluate the impact of splitting algorithms
+> **Note**
+> We are limited in our ability to test on M1 Macs, but from our limited manual testing we achieve perfect reproducbility in all cases _except occasionally_ with `KMeans` on Apple silicon.
+It has produced _slightly_ different results between platforms regardless of `random_state`, with up to two clusters being assigned differently resulting in data splits which are >99% identical.
+`astartes` is still consistent between runs on the same platform in all cases, and other samplers are not impacted by this apparent bug.
+
+## Evaluate the Impact of Splitting Algorithms
 The `generate_regression_results_dict` function allows users to quickly evaluate the impact of different splitting techniques on any model supported by `sklearn`. All results are stored in a dictionary format and can be displayed in a neatly formatted table using the optional `print_results` argument.
 
 ```
 from sklearn.svm import LinearSVR
 
-from astartes.utils.utils import generate_regression_results_dict
+from astartes.utils import generate_regression_results_dict
 
 sklearn_model = LinearSVR()
 results_dict = generate_regression_results_dict(

diff --git a/astartes/utils/__init__.py b/astartes/utils/__init__.py
@@ -0,0 +1,7 @@
+# import functions from this directory's contents so that users can import
+# them with `from astartes.utils import *`
+# internally, we do NOT do this to make the imports more explicit, i.e.
+# `from astartes.utils.exceptions import *`
+from .user_utils import generate_regression_results_dict
+
+__all__ = ["generate_regression_results_dict"]
diff --git a/astartes/utils/utils.py → astartes/utils/user_utils.py b/astartes/utils/utils.py → astartes/utils/user_utils.py
@@ -2,8 +2,7 @@
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 from tabulate import tabulate
 
-from astartes import train_val_test_split
-from astartes.utils.exceptions import InvalidModelTypeError
+import astartes
 
 
 def generate_regression_results_dict(
@@ -57,7 +56,9 @@ def generate_regression_results_dict(
             }
     """
     if not isinstance(sklearn_model, sklearn.base.BaseEstimator):
-        raise InvalidModelTypeError("Model must be an sklearn model")
+        raise astartes.utils.exceptions.InvalidModelTypeError(
+            "Model must be an sklearn model"
+        )
 
     final_dict = {}
     for sampler in samplers:
@@ -80,7 +81,14 @@ def generate_regression_results_dict(
         }
 
         # obtain indices
-        _, _, _, train_indices, val_indices, test_indices = train_val_test_split(
+        (
+            _,
+            _,
+            _,
+            train_indices,
+            val_indices,
+            test_indices,
+        ) = astartes.train_val_test_split(
             X,
             train_size=train_size,
             val_size=val_size,

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "astartes"
-version = "1.0.3"
+version = "1.1.0"
 authors = [
     { name = "Jackson Burns", email = "[email protected]" },
     { name = "Himaghna Bhattacharjee", email = "[email protected]" },

diff --git a/test/unit/utils/test_utils.py b/test/unit/utils/test_utils.py
@@ -4,8 +4,8 @@
 from sklearn.svm import LinearSVR
 
 from astartes.samplers.interpolation import Random
+from astartes.utils import generate_regression_results_dict
 from astartes.utils.exceptions import InvalidModelTypeError
-from astartes.utils.utils import generate_regression_results_dict
 
 
 class Test_utils(unittest.TestCase):