deselected_tests.yaml

# This file lists node ids (in pytest sense) of sklearn tests that
# are to be deselected during test discovery step.
#
# Deselection can be predicated on the version of scikit-learn used.
# Use - node_id cond, or - node_id cond1,cond2  where cond is OPver.
# Supported OPs are >=, <=, ==, !=, >, <
# For example,
#    - tests/test_isotonic.py::test_permutation_invariance >0.18,<=0.19
#  will exclude deselection in versions 0.18.1, and 0.18.2 only

deselected_tests:

  # Deselecting 5 SVC related tests where duplicate samples end up being support vectors
  # See: https://github.com/scikit-learn/scikit-learn/issues/12738
  - svm/tests/test_svm.py::test_svc_clone_with_callable_kernel
  - svm/tests/test_svm.py::test_precomputed
  - svm/tests/test_sparse.py::test_svc
  - svm/tests/test_sparse.py::test_sparse_realdata
  - svm/tests/test_sparse.py::test_svc_iris

  # Compare Comparison of the result of a 1-in-1 decision function for SVC algorithm between dense and sparse input
  # Such a oneDAL does not guarantee given the different order of operations. Max absolute difference: 2.87212502e-05
  - ensemble/tests/test_bagging.py::test_sparse_classification

  # Bitwise comparison of probabilities using a print.
  - metrics/tests/test_classification.py

  # Max absolute difference: 0.04 for rocauc, and 0.01 for precision_recall
  - metrics/tests/test_ranking.py::test_roc_curve_hard
  - metrics/tests/test_ranking.py::test_precision_recall_curve

  - model_selection/tests/test_search.py::test_search_cv_results_rank_tie_breaking

  # test_k_means_fit_predict is known to sporadically fail for float32 inputs in multithreaded runs
  # See: https://github.com/IntelPython/daal4py/issues/25
  - cluster/tests/test_k_means.py::test_k_means_fit_predict

  # test_non_uniform_strategies fails due to differences in handling of vacuous clusters after update
  # See https://github.com/IntelPython/daal4py/issues/69
  - preprocessing/tests/test_discretization.py::test_nonuniform_strategies >=0.20.3
  - cluster/tests/test_k_means.py::test_relocated_clusters
  - cluster/tests/test_k_means.py::test_kmeans_relocated_clusters

  # oneDAL does not check convergence for tol == 0.0 for ease of benchmarking
  - cluster/tests/test_k_means.py::test_kmeans_convergence
  - cluster/tests/test_k_means.py::test_kmeans_verbose

  # For Newton-CG solver, solution computed in float32 disagrees with that of float64 a little more than
  # the test expects, see https://github.com/scikit-learn/scikit-learn/pull/13645
  - linear_model/tests/test_logistic.py::test_dtype_match

  # _hist_gradient_boosting tests (added in 0.21.0) times out in CircleCI
  - ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py

  # https://github.com/scikit-learn/scikit-learn/pull/15857
  - ensemble/tests/test_stacking.py::test_stacking_cv_influence >=0.22

  # This fails on certain platforms. Weighted data do not go through DAAL,
  # unweighted do. Since convergence is not accomplished (comment in te test
  # suggests that), coefficients are slightly different, resulting in 1 prediction
  # disagreement.
  - ensemble/tests/test_stacking.py::test_stacking_with_sample_weight >=0.22.1

  #
  - tests/test_pipeline.py::test_pipeline_memory

  # docstrings do not document daal_model_ attribute set by fit for
  # LinearRegression, Ridge and SVC
  - tests/test_docstring_parameters.py::test_fit_docstring_attributes

  # Insufficient accuracy of "coefs" and "intercept" in Elastic Net for multi-target problem
  # https://github.com/oneapi-src/oneDAL/issues/494
  - linear_model/tests/test_coordinate_descent.py::test_enet_multitarget >=0.21

  # Insufficient accuracy of objective function in Elastic Net in case warm_start
  # https://github.com/oneapi-src/oneDAL/issues/495
  - linear_model/tests/test_coordinate_descent.py::test_warm_start_convergence_with_regularizer_decrement >=0.21

  # oneDAL doesn't support sample_weight (back to Sklearn), insufficient accuracy (similar to previous cases)
  - linear_model/tests/test_coordinate_descent.py::test_enet_sample_weight_consistency >=0.23

  # We have difference (Max absolute difference: 1.97215226e-31) in computing of log_proba with np.log(y_proba)
  # Looks like using IDP NumPy is the cause for the failure due to use of more aggressive compiler optimization when compile NumPy UFunc loops
  - neural_network/tests/test_mlp.py::test_predict_proba_multilabel

  # On small datasets, the regression coefficients for multi-target problem differ from scikit-learn. Coefficients matches for first label only.
  # For big data the coefficients are close.
  # See: https://github.com/IntelPython/daal4py/issues/275
  - linear_model/tests/test_ridge.py::test_ridge_cv_individual_penalties

  # Different number of iterations because of instability of kmeans
  # https://github.com/IntelPython/daal4py/issues/277
  - cluster/tests/test_k_means.py::test_kmeans_elkan_results

  # Insufficient prediction accuracy for decision forest regression
  # Case test_boston will be renamed since 0.24 version on test_regression
  # This problem will be fixed
  - ensemble/tests/test_forest.py::test_memory_layout
  - ensemble/tests/test_forest.py::test_regression
  - ensemble/tests/test_forest.py::test_boston

  # Different interpretation of trees compared to scikit-learn
  # Looks like we need to align tree traversal. This problem will be fixed
  - ensemble/tests/test_forest.py::test_min_samples_leaf

  # Out of bag score worse than scikit-learn
  # This problem will be fixed
  - ensemble/tests/test_forest.py::test_warm_start_oob
  - ensemble/tests/test_forest.py::test_oob_score_regressors

  # Our threads are used internally and are not explicitly specified
  - ensemble/tests/test_forest.py::test_backend_respected

  # We do not support accessing trees through the result variable
  - ensemble/tests/test_forest.py::test_warm_start
  - inspection/tests/test_partial_dependence.py::test_recursion_decision_tree_vs_forest_and_gbdt

  # Our implementation builds different trees compared to skikit-learn
  # Comparison of tree forest will be failed
  - ensemble/tests/test_forest.py::test_warm_start_clear
  - ensemble/tests/test_forest.py::test_class_weights
  - ensemble/tests/test_stacking.py::test_stacking_cv_influence
  - inspection/tests/test_permutation_importance.py::test_robustness_to_high_cardinality_noisy_feature
  - tests/test_common.py::test_estimators
  - tests/test_multioutput.py::test_multi_output_classification
  - utils/tests/test_estimator_checks.py::test_check_estimator_clones

  # sometimes failed due to non deterministic results in RF
  # will fixed to next release
  - tests/test_multioutput.py::test_classifier_chain_tuple_order

  # Different behavior when 1 class enters the input
  - feature_selection/tests/test_rfe.py::test_rfe_cv_groups

  # The bugs are fixed. Remove these tests from deselected after the gold release
  - ensemble/tests/test_bagging.py::test_classification
  - neighbors/tests/test_neighbors.py::test_kneighbors_classifier
  - neighbors/tests/test_neighbors.py::test_KNeighborsClassifier_multioutput
  - semi_supervised/tests/test_label_propagation.py::test_predict_sparse_callable_kernel

  # daal4py is throwing the exception from the library. Will be fixed.
  - tests/test_common.py::test_check_n_features_in_after_fitting

  # module name should starts with 'sklearn.metrics' but we have 'daal4py.sklearn.metrics'
  - metrics/tests/test_score_objects.py::test_scoring_is_not_metric

  # Stability issue with max absolute difference: 4.33846826e-08/1.17613697e-11. Remove in next release
  - inspection/tests/test_partial_dependence.py::test_partial_dependence_dataframe[features-integer-None-estimator-brute]
  - inspection/tests/test_partial_dependence.py::test_partial_dependence_dataframe[features-string-None-estimator-brute]
  - ensemble/tests/test_bagging.py::test_estimators_samples_deterministic

  # Data for the tests is generated by using SVC. And it's not equal to stock sklearn
  - metrics/tests/test_ranking.py::test_roc_curve_hard < 0.22
  - metrics/tests/test_ranking.py::test_precision_recall_curve < 0.22

  # Some values in PCA.components_ (in the last component) aren't equal (0.6 on average for absolute error in this test)
  # because of different implementations of PCA. Also, results are not stable.
  - decomposition/tests/test_incremental_pca.py::test_whitening

  # The test fails because of changing of 'auto' strategy in PCA to improve performance.
  # 'randomized' PCA expected, but 'full' is given.
  - decomposition/tests/test_pca.py::test_pca_svd_solver_auto[data3-10-randomized]

  # Stability issue with max absolute difference: 0.00015992. Remove in the next release.
  - decomposition/tests/test_pca.py::test_pca_dtype_preservation
  - decomposition/tests/test_pca.py::test_pca_n_components_mostly_explained_variance_ratio

  # Scikit-learn logistic regression predict depends from decision_function while d4p is not.
  # Assertion error in check_estimator(PoorScoreLogisticRegression())
  - utils/tests/test_estimator_checks.py::test_check_estimator

  # --------------------------------------------------------
  # Not need of testing for daal4py patching
reduced_tests:
  - cluster/tests/test_affinity_propagation.py
  - cluster/tests/test_bicluster.py
  - cluster/tests/test_birch.py
  - cluster/tests/test_mean_shift.py
  - cluster/tests/test_optics.py

  - compose/tests/test_column_transformer.py

  - decomposition/tests/test_dict_learning.py
  - decomposition/tests/test_factor_analysis.py
  - decomposition/tests/test_nmf.py
  - decomposition/tests/test_online_lda.py

  - ensemble/tests/test_gradient_boosting.py
  - ensemble/tests/test_gradient_boosting_loss_functions.py
  - ensemble/tests/test_iforest.py

  - feature_selection/tests/test_chi2.py
  - feature_selection/tests/test_feature_select.py
  - feature_selection/tests/test_mutual_info.py
  - feature_selection/tests/test_sequential.py

  - manifold/tests/test_isomap.py
  - manifold/tests/test_locally_linear.py
  - manifold/tests/test_spectral_embedding.py

  - model_selection/tests/test_successive_halving.py

  - neighbors/tests/test_ball_tree.py
  - neighbors/tests/test_kd_tree.py
  - neighbors/tests/test_quad_tree.py

  - tests/test_kernel_approximation.py
  - tests/test_docstring_parameters.py
  - tests/test_dummy.py
  - tests/test_random_projection.py
  - tests/test_naive_bayes.py

  - utils/tests/test_arpack.py
  - utils/tests/test_cython_blas.py
  - utils/tests/test_encode.py
  - utils/tests/test_estimator_html_repr.py
  - utils/tests/test_extmath.py
  - utils/tests/test_fast_dict.py
  - utils/tests/test_mocking.py
  - utils/tests/test_murmurhash.py
  - utils/tests/test_sparsefuncs.py
  - utils/tests/test_utils.py

  - _loss/
  - covariance/
  - cross_decomposition/
  - datasets/
  - ensemble/_hist_gradient_boosting/
  - experimental/
  - feature_extraction/
  - gaussian_process/
  - impute/
  - inspection/
  - neural_network/
  - preprocessing/