Skip to content

Commit

Permalink
rename sparse into sparse_output
Browse files Browse the repository at this point in the history
Signed-off-by: Xavier Dupre <[email protected]>
  • Loading branch information
xadupre committed Jan 23, 2024
1 parent cb28075 commit 5edf1cf
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 169 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines/linux-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
pandas.version: ''
lgbm.version: ''
onnxcc.version: '>=1.8.1'
run.example: '1'
run.example: '0'

Py311-Onnx141-Rt160-Skl131:
do.bench: '0'
Expand Down
2 changes: 1 addition & 1 deletion docs/pipeline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ useful to build complex pipelines such as the following one:
])

categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(sparse=True, handle_unknown='ignore')),
('onehot', OneHotEncoder(sparse_output=True, handle_unknown='ignore')),
('tsvd', TruncatedSVD(n_components=1, algorithm='arpack', tol=1e-4))
])

Expand Down
132 changes: 71 additions & 61 deletions docs/tests/test_documentation_examples.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# SPDX-License-Identifier: Apache-2.0

"""
Tests examples from the documentation.
"""
import unittest
import packaging.version as pv
import os
import sys
import importlib
import subprocess
import onnxruntime
import time
from skl2onnx import __file__ as onnxrt_backend_dev_file

VERBOSE = 0
ROOT = os.path.realpath(os.path.abspath(os.path.join(onnxrt_backend_dev_file, "..")))


def import_source(module_file_path, module_name):
Expand All @@ -24,64 +24,74 @@ def import_source(module_file_path, module_name):
return module_spec.loader.exec_module(module)


class TestDocumentationExample(unittest.TestCase):
def test_documentation_examples(self):
class TestDocumentationExamples(unittest.TestCase):
def run_test(self, fold: str, name: str, verbose=0) -> int:
ppath = os.environ.get("PYTHONPATH", "")
if not ppath:
os.environ["PYTHONPATH"] = ROOT
elif ROOT not in ppath:
sep = ";" if sys.platform == "win32" else ":"
os.environ["PYTHONPATH"] = ppath + sep + ROOT
perf = time.perf_counter()
try:
mod = import_source(fold, os.path.splitext(name)[0])
assert mod is not None
except FileNotFoundError:
# try another way
cmds = [sys.executable, "-u", os.path.join(fold, name)]
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
res = p.communicate()
out, err = res
st = err.decode("ascii", errors="ignore")
if st and "Traceback" in st:
if '"dot" not found in path.' in st:
# dot not installed, this part
# is tested in onnx framework
if verbose:
print(f"failed: {name!r} due to missing dot.")
return 0
raise AssertionError(
"Example '{}' (cmd: {} - exec_prefix='{}') "
"failed due to\n{}"
"".format(name, cmds, sys.exec_prefix, st)
)
dt = time.perf_counter() - perf
if verbose:
print(f"{dt:.3f}: run {name!r}")
return 1

@classmethod
def add_test_methods(cls):
this = os.path.abspath(os.path.dirname(__file__))
fold = os.path.normpath(os.path.join(this, "..", "examples"))
found = os.listdir(fold)
tested = 0
for name in found:
if name.startswith("plot_") and name.endswith(".py"):
if name == "plot_pipeline_lightgbm.py" and pv.Version(
onnxruntime.__version__
) < pv.Version("1.0.0"):
folds = [
os.path.normpath(os.path.join(this, "..", "docs", "examples")),
os.path.normpath(os.path.join(this, "..", "docs", "tutorial")),
]
for fold in folds:
found = os.listdir(fold)
for name in found:
if not name.endswith(".py") or not name.startswith("plot_"):
continue
print("run %r" % name)
try:
mod = import_source(fold, os.path.splitext(name)[0])
assert mod is not None
except FileNotFoundError:
# try another way
cmds = [sys.executable, "-u", os.path.join(fold, name)]
p = subprocess.Popen(
cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
res = p.communicate()
out, err = res
st = err.decode("ascii", errors="ignore")
if len(st) > 0 and "Traceback" in st:
if "No such file or directory: 'dot'" in st:
# dot not installed, this part
# is tested in onnx framework
pass
elif '"dot" not found in path.' in st:
# dot not installed, this part
# is tested in onnx framework
pass
elif ("Please fix either the inputs or " "the model.") in st:
# onnxruntime datasets changed in master branch,
# still the same in released version on pypi
pass
elif (
"Current official support for domain ai.onnx "
"is till opset 12."
) in st:
# one example is using opset 13 but onnxruntime
# only support up to opset 12.
pass
elif "'str' object has no attribute 'decode'" in st:
# unstable bug in scikit-learn<0.24
pass
else:
raise RuntimeError(
"Example '{}' (cmd: {} - exec_prefix='{}') "
"failed due to\n{}"
"".format(name, cmds, sys.exec_prefix, st)
)
tested += 1
if tested == 0:
raise RuntimeError("No example was tested.")
reason = None

if reason:

@unittest.skip(reason)
def _test_(self, name=name):

Check warning

Code scanning / CodeQL

Unreachable code Warning documentation test

This statement is unreachable.
res = self.run_test(fold, name, verbose=VERBOSE)
self.assertTrue(res)

else:

def _test_(self, name=name):
res = self.run_test(fold, name, verbose=VERBOSE)
self.assertTrue(res)

short_name = os.path.split(os.path.splitext(name)[0])[-1]
setattr(cls, f"test_{short_name}", _test_)


TestDocumentationExamples.add_test_methods()

if __name__ == "__main__":
unittest.main()
unittest.main(verbosity=2)
94 changes: 0 additions & 94 deletions docs/tests/test_documentation_tutorial.py

This file was deleted.

2 changes: 1 addition & 1 deletion docs/tutorial/plot_gbegin_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@


categorical_transformer = Pipeline(
[("onehot", OneHotEncoder(sparse=False, handle_unknown="ignore"))]
[("onehot", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))]
)
preprocessor = ColumnTransformer(
transformers=[("cat", categorical_transformer, cat_cols)], remainder="passthrough"
Expand Down
4 changes: 2 additions & 2 deletions tests/test_onnx_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_onnx_helper_load_save(self):
def test_onnx_helper_load_save_init(self):
model = make_pipeline(
Binarizer(),
OneHotEncoder(sparse=False, handle_unknown="ignore"),
OneHotEncoder(sparse_output=False, handle_unknown="ignore"),
StandardScaler(),
)
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
Expand Down Expand Up @@ -107,7 +107,7 @@ def test_onnx_helper_load_save_init(self):
)
def test_onnx_helper_load_save_init_meta(self):
model = make_pipeline(
Binarizer(), OneHotEncoder(sparse=False), StandardScaler()
Binarizer(), OneHotEncoder(sparse_output=False), StandardScaler()
)
X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
model.fit(X)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sklearn_array_feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_array_feature_extractor(self):
num_attributes_clustering = [3, 4] # this is of length 12 in reality
gmm = GaussianMixture(n_components=2, random_state=1)
ohe_cat = [
OneHotEncoder(categories="auto", sparse=False, drop=None)
OneHotEncoder(categories="auto", sparse_output=False, drop=None)
for i in cat_attributes_clustering
]
ct_cat = ColumnTransformer(
Expand Down
6 changes: 3 additions & 3 deletions tests/test_sklearn_one_hot_encoder_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_model_one_hot_encoder_int32(self):
@ignore_warnings(category=FutureWarning)
def test_model_one_hot_encoder_int32_scaler(self):
model = make_pipeline(
OneHotEncoder(categories="auto", sparse=False), RobustScaler()
OneHotEncoder(categories="auto", sparse_output=False), RobustScaler()
)
data = numpy.array(
[[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int32
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_one_hot_encoder_string_drop_first(self):
@ignore_warnings(category=FutureWarning)
def test_model_one_hot_encoder_list_sparse(self):
model = OneHotEncoder(
categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=True
categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse_output=True
)
data = numpy.array(
[[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64
Expand All @@ -265,7 +265,7 @@ def test_model_one_hot_encoder_list_sparse(self):
@ignore_warnings(category=FutureWarning)
def test_model_one_hot_encoder_list_dense(self):
model = OneHotEncoder(
categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse=False
categories=[[0, 1, 4, 5], [1, 2, 3, 5], [0, 3, 4, 6]], sparse_output=False
)
data = numpy.array(
[[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]], dtype=numpy.int64
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sklearn_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def test_pipeline_column_transformer(self):
steps=[
(
"onehot",
OneHotEncoder(sparse=True, handle_unknown="ignore"),
OneHotEncoder(sparse_output=True, handle_unknown="ignore"),
),
(
"tsvd",
Expand Down
4 changes: 3 additions & 1 deletion tests/test_sklearn_pipeline_concat_tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,9 @@ def get_pipeline(N=10000):
dfx_train, dfx_test, dfy_train, dfy_test = train_test_split(dfx, dfy)

cat_features = ["CAT1", "CAT2"]
categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse=True)
categorical_transformer = OneHotEncoder(
handle_unknown="ignore", sparse_output=True
)
textual_feature = "TEXT"
count_vect_transformer = Pipeline(
steps=[
Expand Down
4 changes: 2 additions & 2 deletions tests/test_sklearn_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_model_stacking_classifier_nozipmap_passthrough(self):
@ignore_warnings(category=FutureWarning)
def test_issue_786_exc(self):
pipeline = make_pipeline(
OneHotEncoder(handle_unknown="ignore", sparse=False),
OneHotEncoder(handle_unknown="ignore", sparse_output=False),
StackingClassifier(
estimators=[
("rf", RandomForestClassifier(n_estimators=10, random_state=42)),
Expand Down Expand Up @@ -230,7 +230,7 @@ def test_issue_786_exc(self):
@ignore_warnings(category=FutureWarning)
def test_issue_786(self):
pipeline = make_pipeline(
OneHotEncoder(handle_unknown="ignore", sparse=False),
OneHotEncoder(handle_unknown="ignore", sparse_output=False),
StackingClassifier(
estimators=[
("rf", RandomForestClassifier(n_estimators=10, random_state=42)),
Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test_pipeline_column_transformer(self):

categorical_transformer = Pipeline(
steps=[
("onehot", OneHotEncoder(sparse=True, handle_unknown="ignore")),
("onehot", OneHotEncoder(sparse_output=True, handle_unknown="ignore")),
("tsvd", TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4)),
]
)
Expand Down

0 comments on commit 5edf1cf

Please sign in to comment.