diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index 25aa1d254..70fac27c0 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -19,7 +19,7 @@ jobs: python.version: '3.11' numpy.version: '>=1.21.1' scipy.version: '>=1.7.0' - onnx.version: '-i https://test.pypi.org/simple/ onnx==1.15.0rc2' + onnx.version: 'onnx==1.15.0' # -i https://test.pypi.org/simple/ onnx==1.15.0rc2' onnx.target_opset: '' onnxrt.version: 'onnxruntime==1.16.1' sklearn.version: '>=1.3.1' @@ -275,24 +275,40 @@ jobs: fi displayName: 'install onnx' + - script: | + pip show onnx + displayName: 'onnx version' + - script: | pip install $(onnxrt.version) displayName: 'install onnxruntime' + - script: | + pip show onnx + displayName: 'onnx version' + - script: | if [ '$(onnxcc.version)' == 'git' ] then - pip install git+https://github.com/microsoft/onnxconverter-common + pip install git+https://github.com/microsoft/onnxconverter-common --no-deps else - pip install onnxconverter-common$(onnxcc.version) + pip install onnxconverter-common$(onnxcc.version) --no-deps fi displayName: 'install onnxconverter-common' + - script: | + pip show onnx + displayName: 'onnx version' + - script: | pip install -r requirements.txt pip install -r requirements-dev.txt displayName: 'install requirements' + - script: | + pip show onnx + displayName: 'onnx version' + - script: | pip install -e . displayName: 'install' diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml index 1fa00bda9..ea5a2c1c5 100644 --- a/.azure-pipelines/win32-conda-CI.yml +++ b/.azure-pipelines/win32-conda-CI.yml @@ -15,7 +15,7 @@ jobs: matrix: Py310-Onnx150-Rt161-Skl131: python.version: '3.11' - onnx.version: '-i https://test.pypi.org/simple/ onnx==1.15.0rc2' + onnx.version: 'onnx==1.15.0' # '-i https://test.pypi.org/simple/ onnx==1.15.0rc2' onnx.target_opset: '' numpy.version: 'numpy>=1.22.3' scipy.version: 'scipy' @@ -181,7 +181,7 @@ jobs: - script: | call activate skl2onnxEnvironment - pip install $(onnxcc.version) + pip install $(onnxcc.version) --no-deps displayName: 'install onnxconverter-common' - script: | diff --git a/skl2onnx/operator_converters/gradient_boosting.py b/skl2onnx/operator_converters/gradient_boosting.py index 42b598609..13a8a3597 100644 --- a/skl2onnx/operator_converters/gradient_boosting.py +++ b/skl2onnx/operator_converters/gradient_boosting.py @@ -39,7 +39,10 @@ def convert_sklearn_gradient_boosting_classifier( transform = "LOGISTIC" if op.n_classes_ == 2 else "SOFTMAX" if op.init == "zero": loss = op._loss if hasattr(op, "_loss") else op.loss_ - base_values = np.zeros(loss.K) + if hasattr(loss, "K"): + base_values = np.zeros(loss.K) + else: + base_values = np.zeros(1) elif op.init is None: if hasattr(op.estimators_[0, 0], "n_features_in_"): # sklearn >= 1.2 @@ -142,7 +145,10 @@ def convert_sklearn_gradient_boosting_regressor( if op.init == "zero": loss = op._loss if hasattr(op, "_loss") else op.loss_ - cst = np.zeros(loss.K) + if hasattr(loss, "K"): + cst = np.zeros(loss.K) + else: + cst = np.zeros(1) elif op.init is None: # constant_ was introduced in scikit-learn 0.21. if hasattr(op.init_, "constant_"): diff --git a/tests/test_sklearn_bagging_converter.py b/tests/test_sklearn_bagging_converter.py index 69fe498cc..fe5df9fb1 100644 --- a/tests/test_sklearn_bagging_converter.py +++ b/tests/test_sklearn_bagging_converter.py @@ -3,6 +3,7 @@ import unittest import packaging.version as pv +import onnx import onnxruntime try: @@ -351,6 +352,10 @@ def test_bagging_regressor_sgd(self): X, model, model_onnx, basename="SklearnBaggingRegressorSGD-Dec4" ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="Fixed issue in more recent versions", + ) @ignore_warnings(category=FutureWarning) def test_bagging_regressor_gradient_boosting(self): model, X = fit_regression_model( diff --git a/tests/test_sklearn_count_vectorizer_converter.py b/tests/test_sklearn_count_vectorizer_converter.py index cf21218a4..65bf03868 100644 --- a/tests/test_sklearn_count_vectorizer_converter.py +++ b/tests/test_sklearn_count_vectorizer_converter.py @@ -5,7 +5,9 @@ """ import unittest import sys +import packaging.version as pv import numpy +import onnx from sklearn.feature_extraction.text import CountVectorizer from skl2onnx import convert_sklearn from skl2onnx.common.data_types import StringTensorType @@ -14,6 +16,10 @@ class TestSklearnCountVectorizer(unittest.TestCase): @unittest.skipIf(TARGET_OPSET < 10, reason="not available") + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) def test_model_count_vectorizer11(self): corpus = numpy.array( [ @@ -36,6 +42,10 @@ def test_model_count_vectorizer11(self): corpus, vect, model_onnx, basename="SklearnCountVectorizer11-OneOff-SklCol" ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer22(self): corpus = numpy.array( @@ -59,6 +69,10 @@ def test_model_count_vectorizer22(self): corpus, vect, model_onnx, basename="SklearnCountVectorizer22-OneOff-SklCol" ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer12(self): corpus = numpy.array( @@ -82,6 +96,10 @@ def test_model_count_vectorizer12(self): corpus, vect, model_onnx, basename="SklearnCountVectorizer12-OneOff-SklCol" ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer13(self): corpus = numpy.array( @@ -105,6 +123,10 @@ def test_model_count_vectorizer13(self): corpus, vect, model_onnx, basename="SklearnCountVectorizer13-OneOff-SklCol" ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer_binary(self): corpus = numpy.array( @@ -131,6 +153,10 @@ def test_model_count_vectorizer_binary(self): basename="SklearnCountVectorizerBinary-OneOff-SklCol", ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer11_locale(self): corpus = numpy.array( diff --git a/tests/test_sklearn_count_vectorizer_converter_bug.py b/tests/test_sklearn_count_vectorizer_converter_bug.py index 73b4fea1d..aaea67226 100644 --- a/tests/test_sklearn_count_vectorizer_converter_bug.py +++ b/tests/test_sklearn_count_vectorizer_converter_bug.py @@ -4,7 +4,9 @@ Tests scikit-learn's count vectorizer converter. """ import unittest +import packaging.version as pv import numpy +import onnx from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from skl2onnx import convert_sklearn from skl2onnx.common.data_types import StringTensorType @@ -12,6 +14,10 @@ class TestSklearnCountVectorizerBug(unittest.TestCase): + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer_custom_tokenizer(self): corpus = numpy.array( @@ -47,6 +53,10 @@ def test_model_count_vectorizer_custom_tokenizer(self): basename="SklearnTfidfVectorizer11CustomTokenizer-OneOff-SklCol", ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_count_vectorizer_wrong_ngram(self): corpus = numpy.array( diff --git a/tests/test_sklearn_documentation.py b/tests/test_sklearn_documentation.py index ce9e5922b..00acc45f7 100644 --- a/tests/test_sklearn_documentation.py +++ b/tests/test_sklearn_documentation.py @@ -7,7 +7,9 @@ import unittest import urllib.error import warnings +import packaging.version as pv import numpy as np +import onnx from sklearn.base import BaseEstimator, TransformerMixin from sklearn.datasets import fetch_20newsgroups @@ -73,6 +75,10 @@ class TestSklearnDocumentation(unittest.TestCase): @unittest.skipIf( TARGET_OPSET < 10, reason="Encoding issue fixed in a later version" ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) def test_pipeline_tfidf(self): categories = ["alt.atheism", "talk.religion.misc"] try: @@ -112,6 +118,10 @@ def test_pipeline_tfidf(self): @unittest.skipIf( TARGET_OPSET < 10, reason="Encoding issue fixed in a later version" ) + @unittest.skipIf( + pv.Version(onnx.__version__) < pv.Version("1.16.0"), + reason="ReferenceEvaluator does not support tfidf with strings", + ) def test_pipeline_tfidf_pipeline_minmax(self): categories = ["alt.atheism", "talk.religion.misc"] try: diff --git a/tests/test_sklearn_glm_regressor_converter.py b/tests/test_sklearn_glm_regressor_converter.py index 063fa43fd..61259a8ff 100644 --- a/tests/test_sklearn_glm_regressor_converter.py +++ b/tests/test_sklearn_glm_regressor_converter.py @@ -4,6 +4,7 @@ import unittest import packaging.version as pv +import onnx import numpy from numpy.testing import assert_almost_equal @@ -54,6 +55,12 @@ ort_version = ort_version.split("+")[0] skl_version = ".".join(sklearn_version.split(".")[:2]) +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestGLMRegressorConverter(unittest.TestCase): @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) @@ -778,6 +785,7 @@ def test_model_ransac_regressor_tree(self): model_onnx, verbose=False, basename="SklearnRANSACRegressorTree-Dec3", + backend=BACKEND, ) @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) diff --git a/tests/test_sklearn_gradient_boosting_converters.py b/tests/test_sklearn_gradient_boosting_converters.py index 4ff10f1b6..45197ba6c 100644 --- a/tests/test_sklearn_gradient_boosting_converters.py +++ b/tests/test_sklearn_gradient_boosting_converters.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 - import packaging.version as pv import unittest import numpy as np +import onnx from pandas import DataFrame from sklearn import __version__ as skl_version from sklearn.datasets import make_classification @@ -30,6 +30,12 @@ ort_version = ort_version.split("+")[0] skl_version = skl_version.split("+")[0] +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestSklearnGradientBoostingModels(unittest.TestCase): @unittest.skipIf( @@ -215,7 +221,11 @@ def test_gradient_boosting_regressor_ls_loss(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, basename="SklearnGradientBoostingRegressionLsLoss" + X, + model, + model_onnx, + basename="SklearnGradientBoostingRegressionLsLoss", + backend=BACKEND, ) @unittest.skipIf( @@ -233,7 +243,11 @@ def test_gradient_boosting_regressor_lad_loss(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, basename="SklearnGradientBoostingRegressionLadLoss" + X, + model, + model_onnx, + basename="SklearnGradientBoostingRegressionLadLoss", + backend=BACKEND, ) def test_gradient_boosting_regressor_huber_loss(self): @@ -248,7 +262,11 @@ def test_gradient_boosting_regressor_huber_loss(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, basename="SklearnGradientBoostingRegressionHuberLoss" + X, + model, + model_onnx, + basename="SklearnGradientBoostingRegressionHuberLoss", + backend=BACKEND, ) def test_gradient_boosting_regressor_quantile_loss(self): @@ -267,6 +285,7 @@ def test_gradient_boosting_regressor_quantile_loss(self): model, model_onnx, basename="SklearnGradientBoostingRegressionQuantileLoss-Dec4", + backend=BACKEND, ) def test_gradient_boosting_regressor_int(self): @@ -300,6 +319,7 @@ def test_gradient_boosting_regressor_zero_init(self): model, model_onnx, basename="SklearnGradientBoostingRegressionZeroInit-Dec4", + backend=BACKEND, ) @unittest.skipIf( diff --git a/tests/test_sklearn_grid_search_cv_converter.py b/tests/test_sklearn_grid_search_cv_converter.py index 1e29191d8..3144d57fc 100644 --- a/tests/test_sklearn_grid_search_cv_converter.py +++ b/tests/test_sklearn_grid_search_cv_converter.py @@ -61,10 +61,15 @@ def test_grid_search_multiclass_float(self): "GridSearchCV", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET, + options={"zipmap": False}, ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, basename="SklearnGridSearchMulticlassFloat" + X, + model, + model_onnx, + basename="SklearnGridSearchMulticlassFloat", + backend="onnxruntime", ) def test_grid_search_binary_int(self): diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index 76d6b04d7..1160983aa 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -145,7 +145,7 @@ def test_model_knn_regressor(self): @unittest.skipIf(dont_test_radius(), reason="not available") @unittest.skipIf( - pv.Version(ort_version) < pv.Version("1.8.0"), reason="produces nan values" + pv.Version(ort_version) < pv.Version("1.15.0"), reason="produces nan values" ) @ignore_warnings(category=DeprecationWarning) def test_model_knn_regressor_radius(self): diff --git a/tests/test_sklearn_one_vs_rest_classifier_converter.py b/tests/test_sklearn_one_vs_rest_classifier_converter.py index 10c6bafbd..ba0ba871a 100644 --- a/tests/test_sklearn_one_vs_rest_classifier_converter.py +++ b/tests/test_sklearn_one_vs_rest_classifier_converter.py @@ -409,7 +409,11 @@ def test_ovr_classification_float_binary_ensemble(self): ) self.assertIsNotNone(model_onnx) dump_data_and_model( - X, model, model_onnx, basename="SklearnOVRClassificationFloatBinEnsemble" + X, + model, + model_onnx, + basename="SklearnOVRClassificationFloatBinEnsemble", + backend="onnxruntime", ) @ignore_warnings(category=warnings_to_skip) diff --git a/tests/test_sklearn_pipeline_within_pipeline.py b/tests/test_sklearn_pipeline_within_pipeline.py index e45de2305..77e833d8f 100644 --- a/tests/test_sklearn_pipeline_within_pipeline.py +++ b/tests/test_sklearn_pipeline_within_pipeline.py @@ -6,7 +6,9 @@ from textwrap import dedent import unittest from io import StringIO +import packaging.version as pv import numpy as np +import onnx import pandas try: @@ -33,6 +35,12 @@ from skl2onnx.common.data_types import FloatTensorType, StringTensorType from test_utils import dump_data_and_model, TARGET_OPSET +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestSklearnPipelineWithinPipeline(unittest.TestCase): def test_pipeline_pca_pipeline_minmax(self): @@ -362,7 +370,11 @@ def test_complex_pipeline(self): preprocessor, initial_types=initial_type, target_opset=TARGET_OPSET ) dump_data_and_model( - X_train, preprocessor, onx, basename="SklearnPipelineComplex" + X_train, + preprocessor, + onx, + basename="SklearnPipelineComplex", + backend=BACKEND, ) diff --git a/tests/test_sklearn_random_forest_converters.py b/tests/test_sklearn_random_forest_converters.py index c4b2f9db2..d647dd166 100644 --- a/tests/test_sklearn_random_forest_converters.py +++ b/tests/test_sklearn_random_forest_converters.py @@ -5,6 +5,7 @@ import packaging.version as pv import numpy from numpy.testing import assert_almost_equal +import onnx from onnxruntime import InferenceSession, __version__ as ort_version import sklearn from sklearn.datasets import load_iris, make_regression, make_classification @@ -66,6 +67,12 @@ def _sklearn_version(): ort_version = ".".join(ort_version.split(".")[:2]) +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestSklearnTreeEnsembleModels(unittest.TestCase): def test_random_forest_classifier(self): @@ -367,6 +374,7 @@ def common_test_model_hgb_regressor(self, add_nan=False): model_onnx, basename=f"SklearnHGBRegressor{add_nan}", verbose=False, + backend=BACKEND, ) @unittest.skipIf( diff --git a/tests/test_sklearn_scaler_converter.py b/tests/test_sklearn_scaler_converter.py index cde0231a5..94128bac0 100644 --- a/tests/test_sklearn_scaler_converter.py +++ b/tests/test_sklearn_scaler_converter.py @@ -50,6 +50,7 @@ def test_standard_scaler_int(self): model, model_onnx, basename="SklearnStandardScalerInt64", + backend="onnxruntime", ) @ignore_warnings(category=DeprecationWarning) diff --git a/tests/test_sklearn_text.py b/tests/test_sklearn_text.py index ae8f33ea0..ebd966509 100644 --- a/tests/test_sklearn_text.py +++ b/tests/test_sklearn_text.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- -""" -@brief test log(time=10s) -""" + import unittest +import packaging.version as pv import numpy from numpy.testing import assert_almost_equal +import onnx from sklearn import __version__ as skl_version, __file__ as skl_file from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from skl2onnx import to_onnx @@ -13,6 +13,12 @@ from skl2onnx.common.data_types import StringTensorType from test_utils import dump_data_and_model, TARGET_OPSET +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestSklearnText(unittest.TestCase): def test_count_vectorizer(self): @@ -297,6 +303,7 @@ def test_model_tfidf_vectorizer_issue(self): vect, model_onnx, basename="SklearnTfidfVectorizerIssue-OneOff-SklCol", + backend=BACKEND, ) diff --git a/tests/test_sklearn_tfidf_transformer_converter_sparse.py b/tests/test_sklearn_tfidf_transformer_converter_sparse.py index cb3353a7b..19c2e1831 100644 --- a/tests/test_sklearn_tfidf_transformer_converter_sparse.py +++ b/tests/test_sklearn_tfidf_transformer_converter_sparse.py @@ -6,6 +6,7 @@ """ import packaging.version as pv import unittest +import onnx from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.pipeline import Pipeline @@ -14,6 +15,12 @@ from skl2onnx import convert_sklearn from test_utils import dump_data_and_model, TARGET_OPSET +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestSklearnTfidfVectorizerSparse(unittest.TestCase): @unittest.skipIf( @@ -51,6 +58,7 @@ def test_model_tfidf_transform_bug(self): text_clf, model_onnx, basename="SklearnPipelineTfidfTransformer", + backend=BACKEND, ) diff --git a/tests/test_sklearn_tfidf_vectorizer_converter.py b/tests/test_sklearn_tfidf_vectorizer_converter.py index f72f9080f..117286488 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter.py @@ -9,6 +9,7 @@ import packaging.version as pv import numpy from numpy.testing import assert_almost_equal +import onnx from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.svm import SVC @@ -34,6 +35,12 @@ ort_version = ".".join(ort_version.split(".")[:2]) +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestSklearnTfidfVectorizer(unittest.TestCase): def get_options(self): @@ -63,7 +70,11 @@ def test_model_tfidf_vectorizer11(self): ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, basename="SklearnTfidfVectorizer11-OneOff-SklCol" + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11-OneOff-SklCol", + backend=BACKEND, ) sess = InferenceSession( @@ -100,6 +111,7 @@ def test_model_tfidf_vectorizer11_nolowercase(self): vect, model_onnx, basename="SklearnTfidfVectorizer11NoL-OneOff-SklCol", + backend=BACKEND, ) sess = InferenceSession( @@ -170,6 +182,7 @@ def test_model_tfidf_vectorizer11_empty_string_case1(self): vect, model_onnx, basename="SklearnTfidfVectorizer11EmptyStringSepCase1-" "OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -198,6 +211,7 @@ def test_model_tfidf_vectorizer11_empty_string_case2(self): vect, model_onnx, basename="SklearnTfidfVectorizer11EmptyString-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -233,6 +247,7 @@ def test_model_tfidf_vectorizer11_out_vocabulary(self): vect, model_onnx, basename="SklearnTfidfVectorizer11OutVocab-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -256,7 +271,11 @@ def test_model_tfidf_vectorizer22(self): ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22-OneOff-SklCol" + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer22-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -273,7 +292,11 @@ def test_model_tfidf_vectorizer21(self): ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22S-OneOff-SklCol" + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer22S-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -297,7 +320,11 @@ def test_model_tfidf_vectorizer12(self): ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, basename="SklearnTfidfVectorizer22-OneOff-SklCol" + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer22-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -324,6 +351,7 @@ def test_model_tfidf_vectorizer12_normL1(self): vect, model_onnx, basename="SklearnTfidfVectorizer22L1-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -351,6 +379,7 @@ def test_model_tfidf_vectorizer12_normL2(self): vect, model_onnx, basename="SklearnTfidfVectorizer22L2-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -374,7 +403,11 @@ def test_model_tfidf_vectorizer13(self): ) self.assertTrue(model_onnx is not None) dump_data_and_model( - corpus, vect, model_onnx, basename="SklearnTfidfVectorizer13-OneOff-SklCol" + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer13-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -409,6 +442,7 @@ def test_model_tfidf_vectorizer11parenthesis_class(self): vect, model_onnx, basename="SklearnTfidfVectorizer11ParenthesisClass-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -454,6 +488,7 @@ def test_model_tfidf_vectorizer11_idparenthesis_id(self): vect, model_onnx, basename="SklearnTfidfVectorizer11ParenthesisId-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -481,6 +516,7 @@ def test_model_tfidf_vectorizer_binary(self): vect, model_onnx, basename="SklearnTfidfVectorizerBinary-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -511,6 +547,7 @@ def test_model_tfidf_vectorizer11_64(self): vect, model_onnx, basename="SklearnTfidfVectorizer1164-OneOff-SklCol", + backend=BACKEND, ) sess = InferenceSession( @@ -614,6 +651,7 @@ def test_model_tfidf_vectorizer11_custom_vocabulary(self): vect, model_onnx, basename="SklearnTfidfVectorizer11CustomVocab-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -650,6 +688,7 @@ def test_model_tfidf_vectorizer_locale(self): vect, model_onnx, basename="SklearnTfidfVectorizer11Locale-OneOff-SklCol", + backend=BACKEND, ) sess = InferenceSession( diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_char.py b/tests/test_sklearn_tfidf_vectorizer_converter_char.py index f0096b18e..b5abf06b6 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_char.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_char.py @@ -4,13 +4,22 @@ Tests scikit-learn's tfidf converter. """ import unittest +import packaging.version as pv import numpy +import onnx from sklearn.feature_extraction.text import TfidfVectorizer from skl2onnx import convert_sklearn from skl2onnx.common.data_types import StringTensorType from test_utils import dump_data_and_model, TARGET_OPSET +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + + class TestSklearnTfidfVectorizerRegex(unittest.TestCase): @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_short_word(self): @@ -37,6 +46,7 @@ def test_model_tfidf_vectorizer11_short_word(self): vect, model_onnx, basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -84,6 +94,7 @@ def test_model_tfidf_vectorizer11_char(self): vect, model_onnx, basename="SklearnTfidfVectorizer11Char-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -110,6 +121,7 @@ def test_model_tfidf_vectorizer11_char_doublespace(self): vect, model_onnx, basename="SklearnTfidfVectorizer11CharSpace-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -135,6 +147,7 @@ def test_model_tfidf_vectorizer12_char(self): vect, model_onnx, basename="SklearnTfidfVectorizer12Char-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -161,6 +174,7 @@ def test_model_tfidf_vectorizer12_normL1_char(self): vect, model_onnx, basename="SklearnTfidfVectorizer12L1Char-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -212,6 +226,7 @@ def test_model_tfidf_vectorizer11_short_word_spaces(self): vect, model_onnx, basename="SklearnTfidfVectorizer11CharW2-OneOff-SklCol", + backend=BACKEND, ) diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py index 9ed7306ae..779160251 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py @@ -4,7 +4,9 @@ Tests scikit-learn's tfidf converter using downloaded data. """ import unittest +import packaging.version as pv import numpy as np +import onnx from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.datasets import fetch_20newsgroups @@ -12,6 +14,12 @@ from skl2onnx.common.data_types import StringTensorType from test_utils import dump_data_and_model, TARGET_OPSET +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + class TestSklearnTfidfVectorizerDataSet(unittest.TestCase): @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @@ -30,7 +38,11 @@ def test_tfidf_20newsgroups(self): target_opset=TARGET_OPSET, ) dump_data_and_model( - X_test, model, onnx_model, basename="SklearnTfidfVectorizer20newsgroups" + X_test, + model, + onnx_model, + basename="SklearnTfidfVectorizer20newsgroups", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 9, reason="not available") @@ -53,6 +65,7 @@ def test_tfidf_20newsgroups_nolowercase(self): model, onnx_model, basename="SklearnTfidfVectorizer20newsgroupsNOLower", + backend=BACKEND, ) diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_regex.py b/tests/test_sklearn_tfidf_vectorizer_converter_regex.py index df94705f4..184fd7ceb 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_regex.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_regex.py @@ -4,13 +4,22 @@ Tests scikit-learn's tfidf converter. """ import unittest +import packaging.version as pv import numpy +import onnx from sklearn.feature_extraction.text import TfidfVectorizer from skl2onnx import convert_sklearn from skl2onnx.common.data_types import StringTensorType from test_utils import dump_data_and_model, TARGET_OPSET +BACKEND = ( + "onnxruntime" + if pv.Version(onnx.__version__) < pv.Version("1.16.0") + else "onnx;onnxruntime" +) + + class TestSklearnTfidfVectorizerRegex(unittest.TestCase): def get_options(self): return {TfidfVectorizer: {"tokenexp": ""}} @@ -40,6 +49,7 @@ def test_model_tfidf_vectorizer11(self): vect, model_onnx, basename="SklearnTfidfVectorizer11Regex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -70,7 +80,13 @@ def test_model_tfidf_vectorizer11_opset(self): self.assertTrue(model_onnx is not None) if opset >= 10: name = "SklearnTfidfVectorizer11Rx%d-OneOff-SklCol" % opset - dump_data_and_model(corpus, vect, model_onnx, basename=name) + dump_data_and_model( + corpus, + vect, + model_onnx, + basename=name, + backend=BACKEND, + ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") def test_model_tfidf_vectorizer11_word4(self): @@ -99,6 +115,7 @@ def test_model_tfidf_vectorizer11_word4(self): vect, model_onnx, basename="SklearnTfidfVectorizer11Regex4-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -127,6 +144,7 @@ def test_model_tfidf_vectorizer11_empty_string(self): vect, model_onnx, basename="SklearnTfidfVectorizer11EmptyStringRegex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -162,6 +180,7 @@ def test_model_tfidf_vectorizer11_out_vocabulary(self): vect, model_onnx, basename="SklearnTfidfVectorizer11OutVocabRegex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -189,6 +208,7 @@ def test_model_tfidf_vectorizer22(self): vect, model_onnx, basename="SklearnTfidfVectorizer22Regex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -214,6 +234,7 @@ def test_model_tfidf_vectorizer12(self): vect, model_onnx, basename="SklearnTfidfVectorizer12SRegex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -241,6 +262,7 @@ def test_model_tfidf_vectorizer122(self): vect, model_onnx, basename="SklearnTfidfVectorizer12Regex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -267,6 +289,7 @@ def test_model_tfidf_vectorizer12_normL1(self): vect, model_onnx, basename="SklearnTfidfVectorizer12L1Regex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -294,6 +317,7 @@ def test_model_tfidf_vectorizer12_normL2(self): vect, model_onnx, basename="SklearnTfidfVectorizer12L2Regex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -321,6 +345,7 @@ def test_model_tfidf_vectorizer13(self): vect, model_onnx, basename="SklearnTfidfVectorizer13Regex-OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -356,6 +381,7 @@ def test_model_tfidf_vectorizer11parenthesis_class(self): vect, model_onnx, basename="SklearnTfidfVectorizer11ParenthesisClassRegex-" "OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -409,6 +435,7 @@ def test_model_tfidf_vectorizer11_idparenthesis_id(self): vect, model_onnx, basename="SklearnTfidfVectorizer11ParenthesisIdRegex-" "OneOff-SklCol", + backend=BACKEND, ) @unittest.skipIf(TARGET_OPSET < 10, reason="not available") @@ -457,9 +484,9 @@ def test_model_tfidf_vectorizer_issue(self): vect, model_onnx, basename="SklearnTfidfVectorizerIssue-OneOff-SklCol", + backend=BACKEND, ) if __name__ == "__main__": - TestSklearnTfidfVectorizerRegex().test_model_tfidf_vectorizer_issue() - unittest.main() + unittest.main(verbosity=2) diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index 6bc2a07e9..bb9385e7e 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -54,10 +54,9 @@ def InferenceSessionEx(onx, *args, verbose=0, **kwargs): def create_tensor(N, C, H=None, W=None): if H is None and W is None: return np.random.rand(N, C).astype(np.float32, copy=False) - elif H is not None and W is not None: + if H is not None and W is not None: return np.random.rand(N, C, H, W).astype(np.float32, copy=False) - else: - raise ValueError("This function only produce 2-D or 4-D tensor.") + raise ValueError("This function only produce 2-D or 4-D tensor.") def _get_ir_version(opv): diff --git a/tests_onnxmltools/test_xgboost_converters.py b/tests_onnxmltools/test_xgboost_converters.py index 404796a52..9e48209c6 100644 --- a/tests_onnxmltools/test_xgboost_converters.py +++ b/tests_onnxmltools/test_xgboost_converters.py @@ -74,7 +74,7 @@ def custom_parser(scope, model, inputs, custom_parsers=None): ) @unittest.skipIf( - pv.Version(onnxmltools.__version__) < pv.Version("1.11"), + pv.Version(onnxmltools.__version__) < pv.Version("1.12"), reason="converter for xgboost is too old", ) def test_xgb_regressor(self):