uxlfoundation · md-shafiul-alam · Jan 4, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 4, 2024
@@ -208,7 +208,7 @@ def is_string(s, target_str):
         maxIterations=numIterations,
         accuracyThreshold=abs_tol,
         fptype=X_fptype,
-        resultsToEvaluate="computeCentroids",
+        resultsToEvaluate="computeCentroids|computeAssignments|computeExactObjectiveFunction",
         method=method,
     )
 
@@ -581,7 +581,7 @@ def __init__(
             verbose=0,
             random_state=None,
             copy_x=True,
-            algorithm="lloyd" if sklearn_check_version("1.1") else "auto",
+            algorithm="lloyd",
         ):
             super(KMeans, self).__init__(
                 n_clusters=n_clusters,

@@ -31,6 +31,12 @@ deselected_tests:
 
   # test for KMeans FutureWarning is not removed from sklearn tests suit yet
   - cluster/tests/test_k_means.py::test_change_n_init_future_warning[KMeans-10] ==1.4.dev0
+
+  # copied from preview, clusters centers for "full" method are different from "elkan", needs investigation
+  - cluster/tests/test_k_means.py::test_kmeans_elkan_results
+  - cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[KMeans-dense] <1.2
+  - cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[42-KMeans-dense] >=1.2
+  - cluster/tests/test_k_means.py::test_predict_sample_weight_deprecation_warning[KMeans] >=1.3
 
   # Non-critical, but there are significant numerical differences in doctest results
   - pipeline.py::sklearn.pipeline.FeatureUnion
@@ -353,14 +359,6 @@ deselected_tests:
   - tests/test_common.py::test_estimators[LogisticRegression()-check_sample_weights_invariance(kind=zeros)] >=1.4
   - tests/test_multioutput.py::test_classifier_chain_fit_and_predict_with_sparse_data >=1.4
 
-  # New failing sklearn1.4.1 tests for kmeans associated with incorrect n_iter_ values in daal4py
-  - cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-dense] >=1.4
-  - cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_matrix] >=1.4
-  - cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_array] >=1.4
-  - cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-dense] >=1.4
-  - cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_matrix] >=1.4
-  - cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_array] >=1.4
-
 
   # --------------------------------------------------------
   # No need to test daal4py patching
@@ -1181,9 +1179,3 @@ gpu:
   - tests/test_common.py::test_check_n_features_in_after_fitting[SVC()]
   # originated with pca dpctl/dpnp fit, to be re-assesed with pca out-of-preview
   - decomposition/tests/test_pca.py::test_pca_n_components_mostly_explained_variance_ratio
-
-preview:
-  - cluster/tests/test_k_means.py::test_kmeans_elkan_results
-  - cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[KMeans-dense] <1.2
-  - cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[42-KMeans-dense] >=1.2
-  - cluster/tests/test_k_means.py::test_predict_sample_weight_deprecation_warning[KMeans] >=1.3
@@ -68,7 +68,12 @@ struct params2desc {
         desc.set_cluster_count( params["cluster_count"].cast<std::int64_t>() );
         desc.set_accuracy_threshold( params["accuracy_threshold"].cast<Float>() );
         desc.set_max_iteration_count( params["max_iteration_count"].cast<std::int64_t>() );
-
+#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240200
+        auto result_options = params["result_options"].cast<std::string>();
+        if (result_options == "computeAssignments"){
+            desc.set_result_options(result_options::compute_assignments);
+        }
+#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240200
         return desc;
     }
 };

@@ -142,15 +142,17 @@ def _check_params_vs_input(
             self._n_init = 1
         assert self.algorithm == "lloyd"
 
-    def _get_onedal_params(self, dtype=np.float32):
+    def _get_onedal_params(self, dtype=np.float32, result_options=None):
         thr = self._tol if hasattr(self, "_tol") else self.tol
+
         return {
             "fptype": "float" if dtype == np.float32 else "double",
             "method": "by_default",
             "seed": -1,
             "max_iteration_count": self.max_iter,
             "cluster_count": self.n_clusters,
             "accuracy_threshold": thr,
+            "result_options": "" if result_options is None else result_options,
         }
 
     def _get_params_and_input(self, X, policy):
@@ -340,7 +342,7 @@ def _set_cluster_centers(self, cluster_centers):
     cluster_centers_ = property(_get_cluster_centers, _set_cluster_centers)
 
     def _predict_raw(self, X_table, module, policy, dtype=np.float32):
-        params = self._get_onedal_params(dtype)
+        params = self._get_onedal_params(dtype, result_options="computeAssignments")
 
         result = module.infer(policy, params, self.model_, X_table)
 

diff --git a/setup_sklearnex.py b/setup_sklearnex.py
@@ -96,7 +96,6 @@
     "sklearnex.neighbors",
     "sklearnex.preview",
     "sklearnex.preview.covariance",
-    "sklearnex.preview.cluster",
     "sklearnex.preview.decomposition",
     "sklearnex.svm",
     "sklearnex.utils",