Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KMeans out of preview namespace #1634

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions daal4py/sklearn/cluster/_k_means_0_23.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def is_string(s, target_str):
maxIterations=numIterations,
accuracyThreshold=abs_tol,
fptype=X_fptype,
resultsToEvaluate="computeCentroids",
resultsToEvaluate="computeCentroids|computeAssignments|computeExactObjectiveFunction",
md-shafiul-alam marked this conversation as resolved.
Show resolved Hide resolved
method=method,
)

Expand Down Expand Up @@ -581,7 +581,7 @@ def __init__(
verbose=0,
random_state=None,
copy_x=True,
algorithm="lloyd" if sklearn_check_version("1.1") else "auto",
algorithm="lloyd",
):
super(KMeans, self).__init__(
n_clusters=n_clusters,
Expand Down
20 changes: 6 additions & 14 deletions deselected_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ deselected_tests:

# test for KMeans FutureWarning is not removed from sklearn tests suit yet
- cluster/tests/test_k_means.py::test_change_n_init_future_warning[KMeans-10] ==1.4.dev0

# copied from preview, clusters centers for "full" method are different from "elkan", needs investigation
- cluster/tests/test_k_means.py::test_kmeans_elkan_results
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[KMeans-dense] <1.2
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[42-KMeans-dense] >=1.2
- cluster/tests/test_k_means.py::test_predict_sample_weight_deprecation_warning[KMeans] >=1.3
Comment on lines +35 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be discussed whether they are a blocker for moving out from preview

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure they are still failing?


# Non-critical, but there are significant numerical differences in doctest results
- pipeline.py::sklearn.pipeline.FeatureUnion
Expand Down Expand Up @@ -353,14 +359,6 @@ deselected_tests:
- tests/test_common.py::test_estimators[LogisticRegression()-check_sample_weights_invariance(kind=zeros)] >=1.4
- tests/test_multioutput.py::test_classifier_chain_fit_and_predict_with_sparse_data >=1.4

# New failing sklearn1.4.1 tests for kmeans associated with incorrect n_iter_ values in daal4py
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-dense] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_matrix] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_array] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-dense] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_matrix] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_array] >=1.4

Comment on lines -356 to -363
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are somehow now failing on this PR, but wasn't failing in preview in the main branch. Something unusual is going on, because that shouldn't logically be the case.


# --------------------------------------------------------
# No need to test daal4py patching
Expand Down Expand Up @@ -1181,9 +1179,3 @@ gpu:
- tests/test_common.py::test_check_n_features_in_after_fitting[SVC()]
# originated with pca dpctl/dpnp fit, to be re-assesed with pca out-of-preview
- decomposition/tests/test_pca.py::test_pca_n_components_mostly_explained_variance_ratio

preview:
- cluster/tests/test_k_means.py::test_kmeans_elkan_results
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[KMeans-dense] <1.2
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[42-KMeans-dense] >=1.2
- cluster/tests/test_k_means.py::test_predict_sample_weight_deprecation_warning[KMeans] >=1.3
7 changes: 6 additions & 1 deletion onedal/cluster/kmeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@ struct params2desc {
desc.set_cluster_count( params["cluster_count"].cast<std::int64_t>() );
desc.set_accuracy_threshold( params["accuracy_threshold"].cast<Float>() );
desc.set_max_iteration_count( params["max_iteration_count"].cast<std::int64_t>() );

#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240200
auto result_options = params["result_options"].cast<std::string>();
if (result_options == "computeAssignments"){
desc.set_result_options(result_options::compute_assignments);
}
#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240200
return desc;
}
};
Expand Down
6 changes: 4 additions & 2 deletions onedal/cluster/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,17 @@ def _check_params_vs_input(
self._n_init = 1
assert self.algorithm == "lloyd"

def _get_onedal_params(self, dtype=np.float32):
def _get_onedal_params(self, dtype=np.float32, result_options=None):
md-shafiul-alam marked this conversation as resolved.
Show resolved Hide resolved
thr = self._tol if hasattr(self, "_tol") else self.tol

return {
"fptype": "float" if dtype == np.float32 else "double",
"method": "by_default",
"seed": -1,
"max_iteration_count": self.max_iter,
"cluster_count": self.n_clusters,
"accuracy_threshold": thr,
"result_options": "" if result_options is None else result_options,
}

def _get_params_and_input(self, X, policy):
Expand Down Expand Up @@ -340,7 +342,7 @@ def _set_cluster_centers(self, cluster_centers):
cluster_centers_ = property(_get_cluster_centers, _set_cluster_centers)

def _predict_raw(self, X_table, module, policy, dtype=np.float32):
params = self._get_onedal_params(dtype)
params = self._get_onedal_params(dtype, result_options="computeAssignments")
md-shafiul-alam marked this conversation as resolved.
Show resolved Hide resolved

result = module.infer(policy, params, self.model_, X_table)

Expand Down
1 change: 0 additions & 1 deletion setup_sklearnex.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@
"sklearnex.neighbors",
"sklearnex.preview",
"sklearnex.preview.covariance",
"sklearnex.preview.cluster",
"sklearnex.preview.decomposition",
"sklearnex.svm",
"sklearnex.utils",
Expand Down
Loading
Loading