rename sparse into sparse_output

Signed-off-by: Xavier Dupre <[email protected]>
onnx · Jan 23, 2024 · 5edf1cf · 5edf1cf
1 parent cb28075
commit 5edf1cf
Show file tree

Hide file tree

Showing 12 changed files with 87 additions and 169 deletions.
diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
@@ -40,7 +40,7 @@ jobs:
         pandas.version: ''
         lgbm.version: ''
         onnxcc.version: '>=1.8.1'
-        run.example: '1'
+        run.example: '0'
 
       Py311-Onnx141-Rt160-Skl131:
         do.bench: '0'

diff --git a/docs/pipeline.rst b/docs/pipeline.rst
@@ -41,7 +41,7 @@ useful to build complex pipelines such as the following one:
     ])
 
     categorical_transformer = Pipeline(steps=[
-        ('onehot', OneHotEncoder(sparse=True, handle_unknown='ignore')),
+        ('onehot', OneHotEncoder(sparse_output=True, handle_unknown='ignore')),
         ('tsvd', TruncatedSVD(n_components=1, algorithm='arpack', tol=1e-4))
     ])
 

diff --git a/docs/tests/test_documentation_examples.py b/docs/tests/test_documentation_examples.py
@@ -1,15 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 
-"""
-Tests examples from the documentation.
-"""
 import unittest
-import packaging.version as pv
 import os
 import sys
 import importlib
 import subprocess
-import onnxruntime
+import time
+from skl2onnx import __file__ as onnxrt_backend_dev_file
+
+VERBOSE = 0
+ROOT = os.path.realpath(os.path.abspath(os.path.join(onnxrt_backend_dev_file, "..")))
 
 
 def import_source(module_file_path, module_name):
@@ -24,64 +24,74 @@ def import_source(module_file_path, module_name):
     return module_spec.loader.exec_module(module)
 
 
-class TestDocumentationExample(unittest.TestCase):
-    def test_documentation_examples(self):
+class TestDocumentationExamples(unittest.TestCase):
+    def run_test(self, fold: str, name: str, verbose=0) -> int:
+        ppath = os.environ.get("PYTHONPATH", "")
+        if not ppath:
+            os.environ["PYTHONPATH"] = ROOT
+        elif ROOT not in ppath:
+            sep = ";" if sys.platform == "win32" else ":"
+            os.environ["PYTHONPATH"] = ppath + sep + ROOT
+        perf = time.perf_counter()
+        try:
+            mod = import_source(fold, os.path.splitext(name)[0])
+            assert mod is not None
+        except FileNotFoundError:
+            # try another way
+            cmds = [sys.executable, "-u", os.path.join(fold, name)]
+            p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            res = p.communicate()
+            out, err = res
+            st = err.decode("ascii", errors="ignore")
+            if st and "Traceback" in st:
+                if '"dot" not found in path.' in st:
+                    # dot not installed, this part
+                    # is tested in onnx framework
+                    if verbose:
+                        print(f"failed: {name!r} due to missing dot.")
+                    return 0
+                raise AssertionError(
+                    "Example '{}' (cmd: {} - exec_prefix='{}') "
+                    "failed due to\n{}"
+                    "".format(name, cmds, sys.exec_prefix, st)
+                )
+        dt = time.perf_counter() - perf
+        if verbose:
+            print(f"{dt:.3f}: run {name!r}")
+        return 1
+
+    @classmethod
+    def add_test_methods(cls):
         this = os.path.abspath(os.path.dirname(__file__))
-        fold = os.path.normpath(os.path.join(this, "..", "examples"))
-        found = os.listdir(fold)
-        tested = 0
-        for name in found:
-            if name.startswith("plot_") and name.endswith(".py"):
-                if name == "plot_pipeline_lightgbm.py" and pv.Version(
-                    onnxruntime.__version__
-                ) < pv.Version("1.0.0"):
+        folds = [
+            os.path.normpath(os.path.join(this, "..", "docs", "examples")),
+            os.path.normpath(os.path.join(this, "..", "docs", "tutorial")),
+        ]
+        for fold in folds:
+            found = os.listdir(fold)
+            for name in found:
+                if not name.endswith(".py") or not name.startswith("plot_"):
                     continue
-                print("run %r" % name)
-                try:
-                    mod = import_source(fold, os.path.splitext(name)[0])
-                    assert mod is not None
-                except FileNotFoundError:
-                    # try another way
-                    cmds = [sys.executable, "-u", os.path.join(fold, name)]
-                    p = subprocess.Popen(
-                        cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-                    )
-                    res = p.communicate()
-                    out, err = res
-                    st = err.decode("ascii", errors="ignore")
-                    if len(st) > 0 and "Traceback" in st:
-                        if "No such file or directory: 'dot'" in st:
-                            # dot not installed, this part
-                            # is tested in onnx framework
-                            pass
-                        elif '"dot" not found in path.' in st:
-                            # dot not installed, this part
-                            # is tested in onnx framework
-                            pass
-                        elif ("Please fix either the inputs or " "the model.") in st:
-                            # onnxruntime datasets changed in master branch,
-                            # still the same in released version on pypi
-                            pass
-                        elif (
-                            "Current official support for domain ai.onnx "
-                            "is till opset 12."
-                        ) in st:
-                            # one example is using opset 13 but onnxruntime
-                            # only support up to opset 12.
-                            pass
-                        elif "'str' object has no attribute 'decode'" in st:
-                            # unstable bug in scikit-learn<0.24
-                            pass
-                        else:
-                            raise RuntimeError(
-                                "Example '{}' (cmd: {} - exec_prefix='{}') "
-                                "failed due to\n{}"
-                                "".format(name, cmds, sys.exec_prefix, st)
-                            )
-                tested += 1
-        if tested == 0:
-            raise RuntimeError("No example was tested.")
+                reason = None
+
+                if reason:
+
+                    @unittest.skip(reason)
+                    def _test_(self, name=name):
+                        res = self.run_test(fold, name, verbose=VERBOSE)
+                        self.assertTrue(res)
+
+                else:
+
+                    def _test_(self, name=name):
+                        res = self.run_test(fold, name, verbose=VERBOSE)
+                        self.assertTrue(res)
+
+                short_name = os.path.split(os.path.splitext(name)[0])[-1]
+                setattr(cls, f"test_{short_name}", _test_)
+
 
+TestDocumentationExamples.add_test_methods()
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main(verbosity=2)
diff --git a/docs/tests/test_documentation_tutorial.py b/docs/tests/test_documentation_tutorial.py
diff --git a/docs/tutorial/plot_gbegin_dataframe.py b/docs/tutorial/plot_gbegin_dataframe.py
@@ -46,7 +46,7 @@
 
 
 categorical_transformer = Pipeline(
-    [("onehot", OneHotEncoder(sparse=False, handle_unknown="ignore"))]
+    [("onehot", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))]
 )
 preprocessor = ColumnTransformer(
     transformers=[("cat", categorical_transformer, cat_cols)], remainder="passthrough"

diff --git a/tests/test_onnx_helper.py b/tests/test_onnx_helper.py
@@ -76,7 +76,7 @@ def test_onnx_helper_load_save(self):
     def test_onnx_helper_load_save_init(self):
         model = make_pipeline(
             Binarizer(),
-            OneHotEncoder(sparse=False, handle_unknown="ignore"),
+            OneHotEncoder(sparse_output=False, handle_unknown="ignore"),
             StandardScaler(),
         )
         X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
@@ -107,7 +107,7 @@ def test_onnx_helper_load_save_init(self):
     )
     def test_onnx_helper_load_save_init_meta(self):
         model = make_pipeline(
-            Binarizer(), OneHotEncoder(sparse=False), StandardScaler()
+            Binarizer(), OneHotEncoder(sparse_output=False), StandardScaler()
         )
         X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
         model.fit(X)

diff --git a/tests/test_sklearn_array_feature_extractor.py b/tests/test_sklearn_array_feature_extractor.py
@@ -34,7 +34,7 @@ def test_array_feature_extractor(self):
         num_attributes_clustering = [3, 4]  # this is of length 12 in reality
         gmm = GaussianMixture(n_components=2, random_state=1)
         ohe_cat = [
-            OneHotEncoder(categories="auto", sparse=False, drop=None)
+            OneHotEncoder(categories="auto", sparse_output=False, drop=None)
             for i in cat_attributes_clustering
         ]
         ct_cat = ColumnTransformer(

diff --git a/tests/test_sklearn_one_hot_encoder_converter.py b/tests/test_sklearn_one_hot_encoder_converter.py
@@ -110,7 +110,7 @@ def test_model_one_hot_encoder_int32(self):
     @ignore_warnings(category=FutureWarning)
     def test_model_one_hot_encoder_int32_scaler(self):
         model = make_pipeline(
-            OneHotEncoder(categories="auto", sparse=False), RobustScaler()
+            OneHotEncoder(categories="auto", sparse_output=False), RobustScaler()
         )
         data = numpy.array(
             [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int32
@@ -238,7 +238,7 @@ def test_one_hot_encoder_string_drop_first(self):
     @ignore_warnings(category=FutureWarning)
     def test_model_one_hot_encoder_list_sparse(self):
         model = OneHotEncoder(
-            categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=True
+            categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse_output=True
         )
         data = numpy.array(
             [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64
@@ -265,7 +265,7 @@ def test_model_one_hot_encoder_list_sparse(self):
     @ignore_warnings(category=FutureWarning)
     def test_model_one_hot_encoder_list_dense(self):
         model = OneHotEncoder(
-            categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=False
+            categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse_output=False
         )
         data = numpy.array(
             [[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64

diff --git a/tests/test_sklearn_pipeline.py b/tests/test_sklearn_pipeline.py
@@ -264,7 +264,7 @@ def test_pipeline_column_transformer(self):
             steps=[
                 (
                     "onehot",
-                    OneHotEncoder(sparse=True, handle_unknown="ignore"),
+                    OneHotEncoder(sparse_output=True, handle_unknown="ignore"),
                 ),
                 (
                     "tsvd",

diff --git a/tests/test_sklearn_pipeline_concat_tfidf.py b/tests/test_sklearn_pipeline_concat_tfidf.py
@@ -284,7 +284,9 @@ def get_pipeline(N=10000):
         dfx_train, dfx_test, dfy_train, dfy_test = train_test_split(dfx, dfy)
 
         cat_features = ["CAT1", "CAT2"]
-        categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse=True)
+        categorical_transformer = OneHotEncoder(
+            handle_unknown="ignore", sparse_output=True
+        )
         textual_feature = "TEXT"
         count_vect_transformer = Pipeline(
             steps=[

diff --git a/tests/test_sklearn_stacking.py b/tests/test_sklearn_stacking.py
@@ -186,7 +186,7 @@ def test_model_stacking_classifier_nozipmap_passthrough(self):
     @ignore_warnings(category=FutureWarning)
     def test_issue_786_exc(self):
         pipeline = make_pipeline(
-            OneHotEncoder(handle_unknown="ignore", sparse=False),
+            OneHotEncoder(handle_unknown="ignore", sparse_output=False),
             StackingClassifier(
                 estimators=[
                     ("rf", RandomForestClassifier(n_estimators=10, random_state=42)),
@@ -230,7 +230,7 @@ def test_issue_786_exc(self):
     @ignore_warnings(category=FutureWarning)
     def test_issue_786(self):
         pipeline = make_pipeline(
-            OneHotEncoder(handle_unknown="ignore", sparse=False),
+            OneHotEncoder(handle_unknown="ignore", sparse_output=False),
             StackingClassifier(
                 estimators=[
                     ("rf", RandomForestClassifier(n_estimators=10, random_state=42)),

diff --git a/tests/test_utils_sklearn.py b/tests/test_utils_sklearn.py
@@ -149,7 +149,7 @@ def test_pipeline_column_transformer(self):
 
         categorical_transformer = Pipeline(
             steps=[
-                ("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore")),
+                ("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore")),
                 ("tsvd", TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4)),
             ]
         )
-Original file line number
+Diff line change
@@ Expand Up / @@ -264,7 +264,7 @@ def test_pipeline_column_transformer(self): @@
                 steps=[
                     (
                         "onehot",
-                        OneHotEncoder(sparse=True, handle_unknown="ignore"),
+                        OneHotEncoder(sparse_output=True, handle_unknown="ignore"),
                     ),
                     (
                         "tsvd",
@@ Expand Down @@