From daed5281f95ff485a1d978597dfad93262577add Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 06:00:06 -0800 Subject: [PATCH 01/56] ENH: using only raw inputs for onedal backend --- onedal/_config.py | 1 + onedal/_device_offload.py | 49 +++++++------ onedal/basic_statistics/basic_statistics.py | 32 ++++++--- sklearnex/_config.py | 3 + sklearnex/_device_offload.py | 77 +++++++++++---------- 5 files changed, 93 insertions(+), 69 deletions(-) diff --git a/onedal/_config.py b/onedal/_config.py index 95a4af41b8..12292cc845 100644 --- a/onedal/_config.py +++ b/onedal/_config.py @@ -22,6 +22,7 @@ "target_offload": "auto", "allow_fallback_to_host": False, "allow_sklearn_after_onedal": True, + "use_raw_input": False, } _threadlocal = threading.local() diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 8d4a9d32d7..0532e2d143 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -180,30 +180,33 @@ def support_input_format(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - if len(args) == 0 and len(kwargs) == 0: - return _run_on_device(func, obj, *args, **kwargs) - data = (*args, *kwargs.values()) - data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - if queue_param and not ( - "queue" in hostkwargs and hostkwargs["queue"] is not None - ): - hostkwargs["queue"] = data_queue - result = _run_on_device(func, obj, *hostargs, **hostkwargs) - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: - result = _copy_to_usm(data_queue, result) - if dpnp_available and isinstance(data[0], dpnp.ndarray): - result = _convert_to_dpnp(result) + if not get_config()["use_raw_input"] == True: + if len(args) == 0 and len(kwargs) == 0: + return _run_on_device(func, obj, *args, **kwargs) + data = (*args, *kwargs.values()) + data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) + if queue_param and not ( + "queue" in hostkwargs and hostkwargs["queue"] is not None + ): + hostkwargs["queue"] = data_queue + result = _run_on_device(func, obj, *hostargs, **hostkwargs) + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(data_queue, result) + if dpnp_available and isinstance(data[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result + config = get_config() + if not ("transform_output" in config and config["transform_output"]): + input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() + if input_array_api: + input_array_api_device = data[0].device + result = _asarray( + result, input_array_api, device=input_array_api_device + ) return result - config = get_config() - if not ("transform_output" in config and config["transform_output"]): - input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() - if input_array_api: - input_array_api_device = data[0].device - result = _asarray( - result, input_array_api, device=input_array_api_device - ) - return result + else: + return _run_on_device(func, obj, *args, **kwargs) if freefunc: diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 0d429d476c..2dc18b2918 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -23,6 +23,8 @@ from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _is_csr from ..utils.validation import _check_array +from .._config import _get_config +from ..utils._array_api import _get_sycl_namespace class BaseBasicStatistics(BaseEstimator, metaclass=ABCMeta): @@ -72,23 +74,33 @@ def __init__(self, result_options="all", algorithm="by_default"): super().__init__(result_options, algorithm) def fit(self, data, sample_weight=None, queue=None): + use_raw_input = _get_config()["use_raw_input"] + # All data should use the same sycl queue. + sua_iface, xp, _ = _get_sycl_namespace(data) + # TODO: + # update support_input_format. + if use_raw_input and sua_iface: + queue = data.sycl_queue policy = self._get_policy(queue, data, sample_weight) - - is_csr = _is_csr(data) - - if data is not None and not is_csr: - data = _check_array(data, ensure_2d=False) - if sample_weight is not None: - sample_weight = _check_array(sample_weight, ensure_2d=False) - + if not use_raw_input: + is_csr = _is_csr(data) + + if data is not None and not is_csr: + data = _check_array(data, ensure_2d=False) + if sample_weight is not None: + sample_weight = _check_array(sample_weight, ensure_2d=False) + # TODO + # use xp for dtype. data, sample_weight = _convert_to_supported(policy, data, sample_weight) is_single_dim = data.ndim == 1 - data_table, weights_table = to_table(data, sample_weight) + data_table = to_table(data, sua_iface=sua_iface) + weights_table = to_table(sample_weight, sua_iface=sua_iface) dtype = data.dtype raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr) for opt, raw_value in raw_result.items(): - value = from_table(raw_value).ravel() + # value = from_table(raw_value.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp).reshape(-1) + value = xp.ravel(from_table(raw_value.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp)) if is_single_dim: setattr(self, opt, value[0]) else: diff --git a/sklearnex/_config.py b/sklearnex/_config.py index fafdde6e68..c69e9a2a68 100644 --- a/sklearnex/_config.py +++ b/sklearnex/_config.py @@ -42,6 +42,7 @@ def set_config( target_offload=None, allow_fallback_to_host=None, allow_sklearn_after_onedal=None, + use_raw_input=None, **sklearn_configs, ): """Set global configuration @@ -75,6 +76,8 @@ def set_config( local_config["allow_fallback_to_host"] = allow_fallback_to_host if allow_sklearn_after_onedal is not None: local_config["allow_sklearn_after_onedal"] = allow_sklearn_after_onedal + if use_raw_input is not None: + local_config["use_raw_input"] = use_raw_input @contextmanager diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 7e299f07e0..903a38b0c8 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -58,44 +58,49 @@ def _get_backend(obj, queue, method_name, *data): def dispatch(obj, method_name, branches, *args, **kwargs): - q = _get_global_queue() - has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) - has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) - hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - - backend, q, patching_status = _get_backend(obj, q, method_name, *hostargs) - has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs - if backend == "onedal": - # Host args only used before onedal backend call. - # Device will be offloaded when onedal backend will be called. - patching_status.write_log(queue=q, transferred_to_host=False) - return branches[backend](obj, *hostargs, **hostkwargs, queue=q) - if backend == "sklearn": - if ( - "array_api_dispatch" in get_config() - and get_config()["array_api_dispatch"] - and "array_api_support" in obj._get_tags() - and obj._get_tags()["array_api_support"] - and not has_usm_data - ): - # USM ndarrays are also excluded for the fallback Array API. Currently, DPNP.ndarray is - # not compliant with the Array API standard, and DPCTL usm_ndarray Array API is compliant, - # except for the linalg module. There is no guarantee that stock scikit-learn will - # work with such input data. The condition will be updated after DPNP.ndarray and - # DPCTL usm_ndarray enabling for conformance testing and these arrays supportance - # of the fallback cases. - # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn, - # then raw inputs are used for the fallback. - patching_status.write_log(transferred_to_host=False) - return branches[backend](obj, *args, **kwargs) - else: - patching_status.write_log() - return branches[backend](obj, *hostargs, **hostkwargs) - raise RuntimeError( - f"Undefined backend {backend} in " f"{obj.__class__.__name__}.{method_name}" - ) + if not get_config()["use_raw_input"] == True: + q = _get_global_queue() + has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) + has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) + hostkwargs = dict(zip(kwargs.keys(), hostvalues)) + + backend, q, patching_status = _get_backend(obj, q, method_name, *hostargs) + has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs + if backend == "onedal": + # Host args only used before onedal backend call. + # Device will be offloaded when onedal backend will be called. + patching_status.write_log(queue=q, transferred_to_host=False) + return branches[backend](obj, *hostargs, **hostkwargs, queue=q) + if backend == "sklearn": + if ( + "array_api_dispatch" in get_config() + and get_config()["array_api_dispatch"] + and "array_api_support" in obj._get_tags() + and obj._get_tags()["array_api_support"] + and not has_usm_data + ): + # USM ndarrays are also excluded for the fallback Array API. Currently, DPNP.ndarray is + # not compliant with the Array API standard, and DPCTL usm_ndarray Array API is compliant, + # except for the linalg module. There is no guarantee that stock scikit-learn will + # work with such input data. The condition will be updated after DPNP.ndarray and + # DPCTL usm_ndarray enabling for conformance testing and these arrays supportance + # of the fallback cases. + # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn, + # then raw inputs are used for the fallback. + patching_status.write_log(transferred_to_host=False) + return branches[backend](obj, *args, **kwargs) + else: + patching_status.write_log() + return branches[backend](obj, *hostargs, **hostkwargs) + raise RuntimeError( + f"Undefined backend {backend} in " f"{obj.__class__.__name__}.{method_name}" + ) + else: + return branches["onedal"](obj, *args, **kwargs, queue=q) +# TODO: +# wrap output. def wrap_output_data(func): """ Converts and moves the output arrays of the decorated function From 1be2ffb98f6e4c19c9acb29e0f2ebf1ffe8b3bf0 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 06:02:07 -0800 Subject: [PATCH 02/56] minor fix --- sklearnex/_device_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 903a38b0c8..8fb33e2c7f 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -96,7 +96,7 @@ def dispatch(obj, method_name, branches, *args, **kwargs): f"Undefined backend {backend} in " f"{obj.__class__.__name__}.{method_name}" ) else: - return branches["onedal"](obj, *args, **kwargs, queue=q) + return branches["onedal"](obj, *args, **kwargs) # TODO: From a23b67772baceeef053022aeff4e20fd22f44691 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 06:03:53 -0800 Subject: [PATCH 03/56] lin --- onedal/_device_offload.py | 4 +++- onedal/basic_statistics/basic_statistics.py | 12 ++++++++---- sklearnex/_device_offload.py | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 0532e2d143..e947cd1d7d 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -198,7 +198,9 @@ def wrapper_impl(obj, *args, **kwargs): return result config = get_config() if not ("transform_output" in config and config["transform_output"]): - input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() + input_array_api = getattr( + data[0], "__array_namespace__", lambda: None + )() if input_array_api: input_array_api_device = data[0].device result = _asarray( diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 2dc18b2918..24615534fc 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -19,12 +19,12 @@ import numpy as np +from .._config import _get_config from ..common._base import BaseEstimator from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _is_csr -from ..utils.validation import _check_array -from .._config import _get_config from ..utils._array_api import _get_sycl_namespace +from ..utils.validation import _check_array class BaseBasicStatistics(BaseEstimator, metaclass=ABCMeta): @@ -74,7 +74,7 @@ def __init__(self, result_options="all", algorithm="by_default"): super().__init__(result_options, algorithm) def fit(self, data, sample_weight=None, queue=None): - use_raw_input = _get_config()["use_raw_input"] + use_raw_input = _get_config()["use_raw_input"] # All data should use the same sycl queue. sua_iface, xp, _ = _get_sycl_namespace(data) # TODO: @@ -100,7 +100,11 @@ def fit(self, data, sample_weight=None, queue=None): raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr) for opt, raw_value in raw_result.items(): # value = from_table(raw_value.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp).reshape(-1) - value = xp.ravel(from_table(raw_value.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp)) + value = xp.ravel( + from_table( + raw_value.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp + ) + ) if is_single_dim: setattr(self, opt, value[0]) else: diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 8fb33e2c7f..76da097fe9 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -63,7 +63,7 @@ def dispatch(obj, method_name, branches, *args, **kwargs): has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - + backend, q, patching_status = _get_backend(obj, q, method_name, *hostargs) has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs if backend == "onedal": From 664e140d207424bf66ba0b2fbc55df959c2a160d Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 5 Nov 2024 08:22:54 -0800 Subject: [PATCH 04/56] fix usw_raw_input True/False with dpctl tensor on device --- onedal/_device_offload.py | 19 +++++++---- onedal/basic_statistics/basic_statistics.py | 36 ++++++++------------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index e947cd1d7d..437d2e0d02 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -180,9 +180,17 @@ def support_input_format(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - if not get_config()["use_raw_input"] == True: - if len(args) == 0 and len(kwargs) == 0: - return _run_on_device(func, obj, *args, **kwargs) + if _get_config()["use_raw_input"] is True: + if "queue" not in kwargs: + usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None) + data_queue = usm_iface["syclobj"] if usm_iface is not None else data_queue + kwargs["queue"] = data_queue + return _run_on_device(func, obj, *args, **kwargs) + + elif len(args) == 0 and len(kwargs) == 0: + return _run_on_device(func, obj, *args, **kwargs) + + else: data = (*args, *kwargs.values()) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if queue_param and not ( @@ -196,8 +204,7 @@ def wrapper_impl(obj, *args, **kwargs): if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result - config = get_config() - if not ("transform_output" in config and config["transform_output"]): + if not get_config().get("transform_output", False): input_array_api = getattr( data[0], "__array_namespace__", lambda: None )() @@ -207,8 +214,6 @@ def wrapper_impl(obj, *args, **kwargs): result, input_array_api, device=input_array_api_device ) return result - else: - return _run_on_device(func, obj, *args, **kwargs) if freefunc: diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 24615534fc..4452bb11b7 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -74,41 +74,33 @@ def __init__(self, result_options="all", algorithm="by_default"): super().__init__(result_options, algorithm) def fit(self, data, sample_weight=None, queue=None): - use_raw_input = _get_config()["use_raw_input"] - # All data should use the same sycl queue. - sua_iface, xp, _ = _get_sycl_namespace(data) - # TODO: - # update support_input_format. - if use_raw_input and sua_iface: - queue = data.sycl_queue policy = self._get_policy(queue, data, sample_weight) - if not use_raw_input: - is_csr = _is_csr(data) + is_csr = _is_csr(data) + + use_raw_input = _get_config().get("use_raw_input", False) is True + + # All data should use the same sycl queue + if use_raw_input and _get_sycl_namespace(data)[0] is not None: + queue = data.sycl_queue + if not use_raw_input: if data is not None and not is_csr: data = _check_array(data, ensure_2d=False) if sample_weight is not None: sample_weight = _check_array(sample_weight, ensure_2d=False) + # TODO # use xp for dtype. data, sample_weight = _convert_to_supported(policy, data, sample_weight) - is_single_dim = data.ndim == 1 - data_table = to_table(data, sua_iface=sua_iface) - weights_table = to_table(sample_weight, sua_iface=sua_iface) + + data_table = to_table(data, sua_iface=_get_sycl_namespace(data)[0]) + weights_table = to_table(sample_weight, sua_iface=_get_sycl_namespace(sample_weight)[0]) dtype = data.dtype raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr) for opt, raw_value in raw_result.items(): - # value = from_table(raw_value.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp).reshape(-1) - value = xp.ravel( - from_table( - raw_value.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp - ) - ) - if is_single_dim: - setattr(self, opt, value[0]) - else: - setattr(self, opt, value) + value = from_table(raw_value).ravel() + setattr(self, opt, value[0]) if data.ndim == 1 else setattr(self, opt, value) return self From 518dceb471a36f348d26dd3f747013d61d78f59d Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 5 Nov 2024 10:31:21 -0800 Subject: [PATCH 05/56] Add hacks to kmeans --- onedal/basic_statistics/basic_statistics.py | 3 +- onedal/cluster/kmeans.py | 32 ++++++++++++++------- onedal/datatypes/_data_conversion.py | 2 +- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 4452bb11b7..841e905e4c 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -14,7 +14,6 @@ # limitations under the License. # ============================================================================== -import warnings from abc import ABCMeta, abstractmethod import numpy as np @@ -74,7 +73,6 @@ def __init__(self, result_options="all", algorithm="by_default"): super().__init__(result_options, algorithm) def fit(self, data, sample_weight=None, queue=None): - policy = self._get_policy(queue, data, sample_weight) is_csr = _is_csr(data) use_raw_input = _get_config().get("use_raw_input", False) is True @@ -91,6 +89,7 @@ def fit(self, data, sample_weight=None, queue=None): # TODO # use xp for dtype. + policy = self._get_policy(queue, data, sample_weight) data, sample_weight = _convert_to_supported(policy, data, sample_weight) data_table = to_table(data, sua_iface=_get_sycl_namespace(data)[0]) diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index 14a0301bcd..a6f5966d1b 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -36,6 +36,8 @@ from ..common._mixin import ClusterMixin, TransformerMixin from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr +from ..utils._array_api import _get_sycl_namespace +from .._config import _get_config class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC): @@ -80,7 +82,7 @@ def _get_kmeans_init(self, cluster_count, seed, algorithm): def _get_basic_statistics_backend(self, result_options): return BasicStatistics(result_options) - def _tolerance(self, X_table, rtol, is_csr, policy, dtype): + def _tolerance(self, X_table, rtol, is_csr, policy, dtype, sua_iface): """Compute absolute tolerance from the relative tolerance""" if rtol == 0.0: return rtol @@ -94,7 +96,7 @@ def _tolerance(self, X_table, rtol, is_csr, policy, dtype): return mean_var * rtol def _check_params_vs_input( - self, X_table, is_csr, policy, default_n_init=10, dtype=np.float32 + self, X_table, is_csr, policy, default_n_init=10, dtype=np.float32, sua_iface=None ): # n_clusters if X_table.shape[0] < self.n_clusters: @@ -103,7 +105,7 @@ def _check_params_vs_input( ) # tol - self._tol = self._tolerance(X_table, self.tol, is_csr, policy, dtype) + self._tol = self._tolerance(X_table, self.tol, is_csr, policy, dtype, sua_iface) # n-init # TODO(1.4): Remove @@ -261,18 +263,28 @@ def _fit_backend( ) def _fit(self, X, module, queue=None): - policy = self._get_policy(queue, X) is_csr = _is_csr(X) - X = _check_array( - X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False - ) + + use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and _get_sycl_namespace(X)[0] is not None: + queue = X.sycl_queue + + if not use_raw_input: + X = _check_array( + X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False + ) + + policy = self._get_policy(queue, X) + X = _convert_to_supported(policy, X) dtype = get_dtype(X) - X_table = to_table(X) + sua_iface = _get_sycl_namespace(X)[0] + X_table = to_table(X, sua_iface=sua_iface) - self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype) + self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype, sua_iface=sua_iface) - params = self._get_onedal_params(is_csr, dtype) + # not used? + # params = self._get_onedal_params(is_csr, dtype) self.n_features_in_ = X_table.column_count diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index e52c4c7f47..b5540a3fa2 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -101,7 +101,7 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): def convert_one_to_table(arg, sua_iface=None): # Note: currently only oneDAL homogen tables are supported and the # contiuginity of the input array should be checked in advance. - if sua_iface: + if arg is not None and sua_iface: return _backend.sua_iface_to_table(arg) if not _is_csr(arg): From df9d9305aac8f9b80d2bd9af71bdaf1ac122058b Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 10:36:21 -0800 Subject: [PATCH 06/56] Basic statistics online not tested properly yet --- .../incremental_basic_statistics.py | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index 4935a57a47..ef4f89bd19 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -18,8 +18,10 @@ from daal4py.sklearn._utils import get_dtype +from .._config import _get_config from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array +from ..utils._array_api import _get_sycl_namespace from .basic_statistics import BaseBasicStatistics @@ -93,26 +95,39 @@ def partial_fit(self, X, weights=None, queue=None): self : object Returns the instance itself. """ + use_raw_input = _get_config().get("use_raw_input", False) is True + sua_iface, xp, _ = _get_sycl_namespace(X) + # Saving input array namespace and sua_iface, that will be used in + # finalize_fit. + self._input_sua_iface = sua_iface + self._input_xp = xp + + # All data should use the same sycl queue + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue + self._queue = queue policy = self._get_policy(queue, X) X, weights = _convert_to_supported(policy, X, weights) - X = _check_array( - X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False - ) - if weights is not None: - weights = _check_array( - weights, - dtype=[np.float64, np.float32], - ensure_2d=False, - force_all_finite=False, + if not use_raw_input: + X = _check_array( + X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False ) + if weights is not None: + weights = _check_array( + weights, + dtype=[np.float64, np.float32], + ensure_2d=False, + force_all_finite=False, + ) if not hasattr(self, "_onedal_params"): dtype = get_dtype(X) self._onedal_params = self._get_onedal_params(False, dtype=dtype) - X_table, weights_table = to_table(X, weights) + X_table = to_table(X, sua_iface=sua_iface) + weights_table = to_table(weights, sua_iface=_get_sycl_namespace(weights)[0]) self._partial_result = self._get_backend( "basic_statistics", None, @@ -140,10 +155,8 @@ def finalize_fit(self, queue=None): Returns the instance itself. """ - if queue is not None: - policy = self._get_policy(queue) - else: - policy = self._get_policy(self._queue) + queue = queue if queue is not None else self._queue + policy = self._get_policy(queue) result = self._get_backend( "basic_statistics", @@ -155,6 +168,14 @@ def finalize_fit(self, queue=None): ) options = self._get_result_options(self.options).split("|") for opt in options: - setattr(self, opt, from_table(getattr(result, opt)).ravel()) + opt_value = self._input_xp.ravel( + from_table( + getattr(result, opt), + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, + ) + ) + setattr(self, opt, opt_value) return self From 3ef345c988044c66d7a21648c9280c90e167294d Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 5 Nov 2024 11:56:45 -0800 Subject: [PATCH 07/56] Covariance support --- onedal/covariance/covariance.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index a6fbbd96c0..7d5b8dfa16 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -18,11 +18,13 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype -from onedal.utils import _check_array from ..common._base import BaseEstimator from ..common.hyperparameters import get_hyperparameters +from .._config import _get_config from ..datatypes import _convert_to_supported, from_table, to_table +from ..utils._array_api import _get_sycl_namespace +from ..utils.validation import _check_array class BaseEmpiricalCovariance(BaseEstimator, metaclass=ABCMeta): @@ -93,9 +95,19 @@ def fit(self, X, y=None, queue=None): self : object Returns the instance itself. """ + use_raw_input = _get_config()["use_raw_input"] + sua_iface, xp, _ = _get_sycl_namespace(X) + if use_raw_input and sua_iface: + queue = X.sycl_queue + policy = self._get_policy(queue, X) - X = _check_array(X, dtype=[np.float64, np.float32]) + + if not use_raw_input: + X = _check_array(X, dtype=[np.float64, np.float32]) + X = _convert_to_supported(policy, X) + X_table = to_table(X, sua_iface=sua_iface) + dtype = get_dtype(X) params = self._get_onedal_params(dtype) hparams = get_hyperparameters("covariance", "compute") @@ -107,19 +119,19 @@ def fit(self, X, y=None, queue=None): policy, params, hparams.backend, - to_table(X), + X_table, ) else: result = self._get_backend( - "covariance", None, "compute", policy, params, to_table(X) + "covariance", None, "compute", policy, params, X_table ) if daal_check_version((2024, "P", 1)) or (not self.bias): - self.covariance_ = from_table(result.cov_matrix) + self.covariance_ = from_table(result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp) else: self.covariance_ = ( - from_table(result.cov_matrix) * (X.shape[0] - 1) / X.shape[0] + from_table(result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp) * (X.shape[0] - 1) / X.shape[0] ) - self.location_ = from_table(result.means).ravel() + self.location_ = xp.reshape(from_table(result.means, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1) return self From 66d7b2df3e752292cbd9cd6e8776081d249fb2b2 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 12:36:04 -0800 Subject: [PATCH 08/56] DBSCAN support --- onedal/cluster/dbscan.py | 44 ++++++++++++++++++++++++++++--------- sklearnex/cluster/dbscan.py | 13 +++++++---- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index f91325b65c..8fe2fb3e9c 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -18,10 +18,12 @@ from daal4py.sklearn._utils import get_dtype, make2d +from .._config import _get_config from ..common._base import BaseEstimator from ..common._mixin import ClusterMixin from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array +from ..utils._array_api import _get_sycl_namespace class BaseDBSCAN(BaseEstimator, ClusterMixin): @@ -58,9 +60,17 @@ def _get_onedal_params(self, dtype=np.float32): def _fit(self, X, y, sample_weight, module, queue): policy = self._get_policy(queue, X) - X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) - sample_weight = make2d(sample_weight) if sample_weight is not None else None - X = make2d(X) + use_raw_input = _get_config().get("use_raw_input", False) is True + sua_iface, xp, _ = _get_sycl_namespace(X) + + # All data should use the same sycl queue + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue + + if not use_raw_input: + X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) + sample_weight = make2d(sample_weight) if sample_weight is not None else None + X = make2d(X) types = [np.float32, np.float64] if get_dtype(X) not in types: @@ -68,16 +78,30 @@ def _fit(self, X, y, sample_weight, module, queue): X = _convert_to_supported(policy, X) dtype = get_dtype(X) params = self._get_onedal_params(dtype) - result = module.compute(policy, params, to_table(X), to_table(sample_weight)) - self.labels_ = from_table(result.responses).ravel() + X_table = to_table(X, sua_iface=sua_iface) + weights_table = to_table( + sample_weight, sua_iface=_get_sycl_namespace(sample_weight)[0] + ) + + result = module.compute(policy, params, X_table, weights_table) + + self.labels_ = xp.reshape( + from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1 + ) if result.core_observation_indices is not None: - self.core_sample_indices_ = from_table( - result.core_observation_indices - ).ravel() + self.core_sample_indices_ = xp.reshape( + from_table( + result.core_observation_indices, + sua_iface=sua_iface, + sycl_queue=queue, + xp=xp, + ), + -1, + ) else: - self.core_sample_indices_ = np.array([], dtype=np.intc) - self.components_ = np.take(X, self.core_sample_indices_, axis=0) + self.core_sample_indices_ = xp.array([], dtype=xp.int32) + self.components_ = xp.take(X, self.core_sample_indices_, axis=0) self.n_features_in_ = X.shape[1] return self diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index ef5f6b78d9..fb99e08399 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -27,6 +27,7 @@ from .._device_offload import dispatch from .._utils import PatchingConditionsChain +from ..config import get_config if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): from sklearn.utils import check_scalar @@ -89,8 +90,10 @@ def __init__( self.n_jobs = n_jobs def _onedal_fit(self, X, y, sample_weight=None, queue=None): - if sklearn_check_version("1.0"): - X = validate_data(self, X, force_all_finite=False) + use_raw_input = _get_config().get("use_raw_input", False) is True + if use_raw_input: + if sklearn_check_version("1.0"): + X = validate_data(self, X, force_all_finite=False) onedal_params = { "eps": self.eps, @@ -140,6 +143,7 @@ def _onedal_gpu_supported(self, method_name, *data): return self._onedal_supported(method_name, *data) def fit(self, X, y=None, sample_weight=None): + use_raw_input = _get_config().get("use_raw_input", False) is True if sklearn_check_version("1.2"): self._validate_params() elif sklearn_check_version("1.1"): @@ -178,8 +182,9 @@ def fit(self, X, y=None, sample_weight=None): if self.eps <= 0.0: raise ValueError(f"eps == {self.eps}, must be > 0.0.") - if sample_weight is not None: - sample_weight = _check_sample_weight(sample_weight, X) + if use_raw_input: + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) dispatch( self, "fit", From 1350c100e7becd173a11a0361ccd815b15350b73 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 12:58:44 -0800 Subject: [PATCH 09/56] minor fix for dbscan --- sklearnex/cluster/dbscan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index fb99e08399..e3fe062098 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -90,7 +90,7 @@ def __init__( self.n_jobs = n_jobs def _onedal_fit(self, X, y, sample_weight=None, queue=None): - use_raw_input = _get_config().get("use_raw_input", False) is True + use_raw_input = get_config().get("use_raw_input", False) is True if use_raw_input: if sklearn_check_version("1.0"): X = validate_data(self, X, force_all_finite=False) @@ -143,7 +143,7 @@ def _onedal_gpu_supported(self, method_name, *data): return self._onedal_supported(method_name, *data) def fit(self, X, y=None, sample_weight=None): - use_raw_input = _get_config().get("use_raw_input", False) is True + use_raw_input = get_config().get("use_raw_input", False) is True if sklearn_check_version("1.2"): self._validate_params() elif sklearn_check_version("1.1"): From 8aaaa702889b66aa3582d15c597c337487878711 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 13:38:53 -0800 Subject: [PATCH 10/56] minor fix for DBSCAN --- sklearnex/cluster/dbscan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index e3fe062098..2f57fbbd38 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -91,7 +91,7 @@ def __init__( def _onedal_fit(self, X, y, sample_weight=None, queue=None): use_raw_input = get_config().get("use_raw_input", False) is True - if use_raw_input: + if not use_raw_input: if sklearn_check_version("1.0"): X = validate_data(self, X, force_all_finite=False) @@ -182,7 +182,7 @@ def fit(self, X, y=None, sample_weight=None): if self.eps <= 0.0: raise ValueError(f"eps == {self.eps}, must be > 0.0.") - if use_raw_input: + if not use_raw_input: if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) dispatch( From f0d92aecce0bc08688675ce48a10122b05c2b585 Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Tue, 5 Nov 2024 14:12:20 -0800 Subject: [PATCH 11/56] Apply raw input for batch linear and logistic regression --- onedal/linear_model/linear_model.py | 87 ++++++++++------ onedal/linear_model/logistic_regression.py | 109 ++++++++++++++------- onedal/utils/_dpep_helpers.py | 15 +++ 3 files changed, 144 insertions(+), 67 deletions(-) diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index cde64cd5ed..196292a03f 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -21,11 +21,13 @@ from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d +from .._config import _get_config from ..common._base import BaseEstimator from ..common._estimator_checks import _check_is_fitted from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array, _check_n_features, _check_X_y, _num_features +from ..utils._array_api import _get_sycl_namespace class BaseLinearRegression(BaseEstimator, metaclass=ABCMeta): @@ -119,11 +121,19 @@ def predict(self, X, queue=None): _check_is_fitted(self) + sua_iface, xp, _ = _get_sycl_namespace(X) + if xp is None: + xp = np + use_raw_input = _get_config().get("use_raw_input") is True + policy = self._get_policy(queue, X) - X = _check_array( - X, dtype=[np.float64, np.float32], force_all_finite=False, ensure_2d=False - ) + if not use_raw_input: + X = _check_array( + X, dtype=[np.float64, np.float32], force_all_finite=False, ensure_2d=False + ) + X = make2d(X) + _check_n_features(self, X, False) if hasattr(self, "_onedal_model"): @@ -131,16 +141,15 @@ def predict(self, X, queue=None): else: model = self._create_model(policy) - X = make2d(X) X = _convert_to_supported(policy, X) params = self._get_onedal_params(get_dtype(X)) - X_table = to_table(X) + X_table = to_table(X, sua_iface=sua_iface) result = module.infer(policy, params, model, X_table) - y = from_table(result.responses) + y = from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp) if y.shape[1] == 1 and self.coef_.ndim == 1: - return y.ravel() + return xp.reshape(y, (-1,)) else: return y @@ -194,18 +203,24 @@ def fit(self, X, y, queue=None): """ module = self._get_backend("linear_model", "regression") - # TODO Fix _check_X_y to make sure this conversion is there - if not isinstance(X, np.ndarray): - X = np.asarray(X) + sua_iface, xp, _ = _get_sycl_namespace(X) + if xp is None: + xp = np + use_raw_input = _get_config().get("use_raw_input") is True + + if not use_raw_input: + # TODO Fix _check_X_y to make sure this conversion is there + if not isinstance(X, np.ndarray): + X = np.asarray(X) - dtype = get_dtype(X) - if dtype not in [np.float32, np.float64]: - dtype = np.float64 - X = X.astype(dtype, copy=self.copy_X) + dtype = get_dtype(X) + if dtype not in [np.float32, np.float64]: + dtype = np.float64 + X = X.astype(dtype, copy=self.copy_X) - y = np.asarray(y).astype(dtype=dtype) + y = np.asarray(y).astype(dtype=dtype) - X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) + X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) policy = self._get_policy(queue, X, y) @@ -213,7 +228,7 @@ def fit(self, X, y, queue=None): X, y = _convert_to_supported(policy, X, y) params = self._get_onedal_params(get_dtype(X)) - X_table, y_table = to_table(X, y) + X_table, y_table = to_table(X, y, sua_iface=sua_iface) hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: @@ -223,14 +238,16 @@ def fit(self, X, y, queue=None): self._onedal_model = result.model - packed_coefficients = from_table(result.model.packed_coefficients) + packed_coefficients = from_table( + result.model.packed_coefficients, sua_iface=sua_iface, sycl_queue=queue, xp=xp + ) self.coef_, self.intercept_ = ( packed_coefficients[:, 1:], packed_coefficients[:, 0], ) if self.coef_.shape[0] == 1 and y.ndim == 1: - self.coef_ = self.coef_.ravel() + self.coef_ = xp.reshape(self.coef_, (-1,)) self.intercept_ = self.intercept_[0] return self @@ -293,17 +310,23 @@ def fit(self, X, y, queue=None): """ module = self._get_backend("linear_model", "regression") - X = _check_array( - X, - dtype=[np.float64, np.float32], - force_all_finite=False, - ensure_2d=False, - copy=self.copy_X, - ) + sua_iface, xp, _ = _get_sycl_namespace(X) + if xp is None: + xp = np + use_raw_input = _get_config().get("use_raw_input") is True - y = np.asarray(y).astype(dtype=get_dtype(X)) + if not use_raw_input: + X = _check_array( + X, + dtype=[np.float64, np.float32], + force_all_finite=False, + ensure_2d=False, + copy=self.copy_X, + ) - X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) + y = np.asarray(y).astype(dtype=get_dtype(X)) + + X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) policy = self._get_policy(queue, X, y) @@ -311,19 +334,21 @@ def fit(self, X, y, queue=None): X, y = _convert_to_supported(policy, X, y) params = self._get_onedal_params(get_dtype(X)) - X_table, y_table = to_table(X, y) + X_table, y_table = to_table(X, y, sua_iface=sua_iface) result = module.train(policy, params, X_table, y_table) self._onedal_model = result.model - packed_coefficients = from_table(result.model.packed_coefficients) + packed_coefficients = from_table( + result.model.packed_coefficients, sua_iface=sua_iface, sycl_queue=queue, xp=xp + ) self.coef_, self.intercept_ = ( packed_coefficients[:, 1:], packed_coefficients[:, 0], ) if self.coef_.shape[0] == 1 and y.ndim == 1: - self.coef_ = self.coef_.ravel() + self.coef_ = xp.reshape(self.coef_, (-1,)) self.intercept_ = self.intercept_[0] return self diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index 518ba03d15..730cfd8cb5 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -21,6 +21,7 @@ from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d +from .._config import _get_config from ..common._base import BaseEstimator as onedal_BaseEstimator from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin @@ -33,6 +34,8 @@ _num_features, _type_of_target, ) +from ..utils._array_api import _get_sycl_namespace +from ..utils._dpep_helpers import get_unique_values_with_dpep class BaseLogisticRegression(onedal_BaseEstimator, metaclass=ABCMeta): @@ -63,29 +66,38 @@ def _get_onedal_params(self, is_csr, dtype=np.float32): } def _fit(self, X, y, module, queue): + use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and _get_sycl_namespace(X)[0] is not None: + queue = X.sycl_queue + sparsity_enabled = daal_check_version((2024, "P", 700)) - X, y = _check_X_y( - X, - y, - accept_sparse=sparsity_enabled, - force_all_finite=True, - accept_2d_y=False, - dtype=[np.float64, np.float32], - ) - is_csr = _is_csr(X) + if not use_raw_input: + X, y = _check_X_y( + X, + y, + accept_sparse=sparsity_enabled, + force_all_finite=True, + accept_2d_y=False, + dtype=[np.float64, np.float32], + ) + if _type_of_target(y) != "binary": + raise ValueError("Only binary classification is supported") + + self.classes_, y = np.unique(y, return_inverse=True) + y = y.astype(dtype=np.int32) + else: + self.classes_ = get_unique_values_with_dpep(y) + n_classes = len(self.classes_) + if n_classes != 2: + raise ValueError("Only binary classification is supported") self.n_features_in_ = _num_features(X, fallback_1d=True) - - if _type_of_target(y) != "binary": - raise ValueError("Only binary classification is supported") - - self.classes_, y = np.unique(y, return_inverse=True) - y = y.astype(dtype=np.int32) - + is_csr = _is_csr(X) policy = self._get_policy(queue, X, y) X, y = _convert_to_supported(policy, X, y) params = self._get_onedal_params(is_csr, get_dtype(X)) - X_table, y_table = to_table(X, y) + sua_iface = _get_sycl_namespace(X, y)[0] + X_table, y_table = to_table(X, y, sua_iface=sua_iface) result = module.train(policy, params, X_table, y_table) @@ -152,22 +164,29 @@ def _create_model(self, module, policy): return m - def _infer(self, X, module, queue): + def _infer(self, X, module, queue, sua_iface): _check_is_fitted(self) + + use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and _get_sycl_namespace(X)[0] is not None: + queue = X.sycl_queue + sparsity_enabled = daal_check_version((2024, "P", 700)) - X = _check_array( - X, - dtype=[np.float64, np.float32], - accept_sparse=sparsity_enabled, - force_all_finite=True, - ensure_2d=False, - accept_large_sparse=sparsity_enabled, - ) - is_csr = _is_csr(X) + if not use_raw_input: + X = _check_array( + X, + dtype=[np.float64, np.float32], + accept_sparse=sparsity_enabled, + force_all_finite=True, + ensure_2d=False, + accept_large_sparse=sparsity_enabled, + ) + X = make2d(X) + _check_n_features(self, X, False) + is_csr = _is_csr(X) - X = make2d(X) policy = self._get_policy(queue, X) if hasattr(self, "_onedal_model"): @@ -178,26 +197,44 @@ def _infer(self, X, module, queue): X = _convert_to_supported(policy, X) params = self._get_onedal_params(is_csr, get_dtype(X)) - X_table = to_table(X) + X_table = to_table(X, sua_iface=sua_iface) + result = module.infer(policy, params, model, X_table) return result def _predict(self, X, module, queue): - result = self._infer(X, module, queue) - y = from_table(result.responses) - y = np.take(self.classes_, y.ravel(), axis=0) + use_raw_input = _get_config().get("use_raw_input") is True + sua_iface, xp, _ = _get_sycl_namespace(X) + if xp is None: + xp = np + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue + + result = self._infer(X, module, queue, sua_iface) + y = from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp) + y = xp.take(xp.asarray(self.classes_), xp.reshape(y, (-1,)), axis=0) return y def _predict_proba(self, X, module, queue): - result = self._infer(X, module, queue) + use_raw_input = _get_config().get("use_raw_input") is True + sua_iface, xp, _ = _get_sycl_namespace(X) + if xp is None: + xp = np + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue + + result = self._infer(X, module, queue, sua_iface) - y = from_table(result.probabilities) + y = from_table(result.probabilities, sua_iface=sua_iface, sycl_queue=queue, xp=xp) y = y.reshape(-1, 1) - return np.hstack([1 - y, y]) + return xp.hstack([1 - y, y]) def _predict_log_proba(self, X, module, queue): + _, xp, _ = _get_sycl_namespace(X) + if xp is None: + xp = np y_proba = self._predict_proba(X, module, queue) - return np.log(y_proba) + return xp.log(y_proba) class LogisticRegression(ClassifierMixin, BaseLogisticRegression): diff --git a/onedal/utils/_dpep_helpers.py b/onedal/utils/_dpep_helpers.py index 3494f71d6d..f9c4cbe56e 100644 --- a/onedal/utils/_dpep_helpers.py +++ b/onedal/utils/_dpep_helpers.py @@ -54,3 +54,18 @@ def is_dpnp_available(version=None): dpctl_available = is_dpctl_available() dpnp_available = is_dpnp_available() + + +if dpnp_available: + import dpnp +if dpctl_available: + import dpctl.tensor as dpt + + +def get_unique_values_with_dpep(X): + if dpnp_available: + return dpnp.unique(X) + elif dpctl_available: + return dpt.unique_values(X) + else: + raise RuntimeError("No DPEP package available to provide `unique` function.") From 3b58beb1e40dd0c97609915fc540a3db0ba6a5fc Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Tue, 5 Nov 2024 14:12:47 -0800 Subject: [PATCH 12/56] Apply linters --- onedal/_device_offload.py | 4 +++- onedal/basic_statistics/basic_statistics.py | 4 +++- onedal/cluster/kmeans.py | 11 ++++++++--- onedal/covariance/covariance.py | 16 ++++++++++++---- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 437d2e0d02..ded14acaf2 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -183,7 +183,9 @@ def wrapper_impl(obj, *args, **kwargs): if _get_config()["use_raw_input"] is True: if "queue" not in kwargs: usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None) - data_queue = usm_iface["syclobj"] if usm_iface is not None else data_queue + data_queue = ( + usm_iface["syclobj"] if usm_iface is not None else data_queue + ) kwargs["queue"] = data_queue return _run_on_device(func, obj, *args, **kwargs) diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 841e905e4c..761c489c32 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -93,7 +93,9 @@ def fit(self, data, sample_weight=None, queue=None): data, sample_weight = _convert_to_supported(policy, data, sample_weight) data_table = to_table(data, sua_iface=_get_sycl_namespace(data)[0]) - weights_table = to_table(sample_weight, sua_iface=_get_sycl_namespace(sample_weight)[0]) + weights_table = to_table( + sample_weight, sua_iface=_get_sycl_namespace(sample_weight)[0] + ) dtype = data.dtype raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr) diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index a6f5966d1b..fea3bd1055 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -32,12 +32,12 @@ from sklearn.metrics.pairwise import euclidean_distances from sklearn.utils import check_random_state +from .._config import _get_config from ..common._base import BaseEstimator as onedal_BaseEstimator from ..common._mixin import ClusterMixin, TransformerMixin from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr from ..utils._array_api import _get_sycl_namespace -from .._config import _get_config class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC): @@ -271,7 +271,10 @@ def _fit(self, X, module, queue=None): if not use_raw_input: X = _check_array( - X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False + X, + dtype=[np.float64, np.float32], + accept_sparse="csr", + force_all_finite=False, ) policy = self._get_policy(queue, X) @@ -281,7 +284,9 @@ def _fit(self, X, module, queue=None): sua_iface = _get_sycl_namespace(X)[0] X_table = to_table(X, sua_iface=sua_iface) - self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype, sua_iface=sua_iface) + self._check_params_vs_input( + X_table, is_csr, policy, dtype=dtype, sua_iface=sua_iface + ) # not used? # params = self._get_onedal_params(is_csr, dtype) diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index 7d5b8dfa16..ee358d57aa 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -19,9 +19,9 @@ from daal4py.sklearn._utils import daal_check_version, get_dtype +from .._config import _get_config from ..common._base import BaseEstimator from ..common.hyperparameters import get_hyperparameters -from .._config import _get_config from ..datatypes import _convert_to_supported, from_table, to_table from ..utils._array_api import _get_sycl_namespace from ..utils.validation import _check_array @@ -126,12 +126,20 @@ def fit(self, X, y=None, queue=None): "covariance", None, "compute", policy, params, X_table ) if daal_check_version((2024, "P", 1)) or (not self.bias): - self.covariance_ = from_table(result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp) + self.covariance_ = from_table( + result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp + ) else: self.covariance_ = ( - from_table(result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp) * (X.shape[0] - 1) / X.shape[0] + from_table( + result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp + ) + * (X.shape[0] - 1) + / X.shape[0] ) - self.location_ = xp.reshape(from_table(result.means, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1) + self.location_ = xp.reshape( + from_table(result.means, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1 + ) return self From d7f2c3cc636ddd8e20b5d0c01d1403120c9068fe Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 14:23:48 -0800 Subject: [PATCH 13/56] fix for DBSCAN --- onedal/cluster/dbscan.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 8fe2fb3e9c..16c5666e38 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -59,7 +59,6 @@ def _get_onedal_params(self, dtype=np.float32): } def _fit(self, X, y, sample_weight, module, queue): - policy = self._get_policy(queue, X) use_raw_input = _get_config().get("use_raw_input", False) is True sua_iface, xp, _ = _get_sycl_namespace(X) @@ -67,6 +66,8 @@ def _fit(self, X, y, sample_weight, module, queue): if use_raw_input and sua_iface is not None: queue = X.sycl_queue + policy = self._get_policy(queue, X) + if not use_raw_input: X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None From 1aca4207caa37c35ddd0f774dd9e1796a67bcb84 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 14:24:57 -0800 Subject: [PATCH 14/56] support for Random Forest --- onedal/ensemble/forest.py | 159 ++++++++++++++++++++-------- sklearnex/ensemble/_forest.py | 191 +++++++++++++++++++--------------- 2 files changed, 218 insertions(+), 132 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index d1d3c9849a..c3e383bf60 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -26,6 +26,7 @@ from daal4py.sklearn._utils import daal_check_version from sklearnex import get_hyperparameters +from .._config import _get_config from ..common._base import BaseEstimator from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin, RegressorMixin @@ -37,6 +38,7 @@ _column_or_1d, _validate_targets, ) +from ..utils._array_api import _get_sycl_namespace class BaseForest(BaseEstimator, BaseEnsemble, metaclass=ABCMeta): @@ -289,36 +291,57 @@ def _get_sample_weight(self, sample_weight, X): return sample_weight def _fit(self, X, y, sample_weight, module, queue): - X, y = _check_X_y( - X, - y, - dtype=[np.float64, np.float32], - force_all_finite=True, - accept_sparse="csr", - ) - y = self._validate_targets(y, X.dtype) + use_raw_input = _get_config().get("use_raw_input", False) is True + sua_iface, xp, _ = _get_sycl_namespace(X) + + # All data should use the same sycl queue + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue + + if not use_raw_input: + X, y = _check_X_y( + X, + y, + dtype=[np.float64, np.float32], + force_all_finite=True, + accept_sparse="csr", + ) + y = self._validate_targets(y, X.dtype) self.n_features_in_ = X.shape[1] if sample_weight is not None and len(sample_weight) > 0: - sample_weight = self._get_sample_weight(sample_weight, X) + if not use_raw_input: + sample_weight = self._get_sample_weight(sample_weight, X) data = (X, y, sample_weight) else: data = (X, y) policy = self._get_policy(queue, *data) data = _convert_to_supported(policy, *data) params = self._get_onedal_params(data[0]) - train_result = module.train(policy, params, *to_table(*data)) + # TODO: + # check for None sample_weight. + train_result = module.train(policy, params, *to_table(*data, sua_iface=sua_iface)) self._onedal_model = train_result.model if self.oob_score: if isinstance(self, ClassifierMixin): - self.oob_score_ = from_table(train_result.oob_err_accuracy).item() + # self.oob_score_ = from_table(train_result.oob_err_accuracy).item() + self.oob_score_ = from_table( + train_result.oob_err_accuracy, + sua_iface=sua_iface, + sycl_queue=queue, + xp=xp, + )[0] + self.oob_decision_function_ = from_table( - train_result.oob_err_decision_function - ) - if np.any(self.oob_decision_function_ == 0): + train_result.oob_err_decision_function, + sua_iface=sua_iface, + sycl_queue=queue, + xp=xp, + )[0] + if xp.any(self.oob_decision_function_ == 0): warnings.warn( "Some inputs do not have OOB scores. This probably means " "too few trees were used to compute any reliable OOB " @@ -326,11 +349,21 @@ def _fit(self, X, y, sample_weight, module, queue): UserWarning, ) else: - self.oob_score_ = from_table(train_result.oob_err_r2).item() - self.oob_prediction_ = from_table( - train_result.oob_err_prediction - ).reshape(-1) - if np.any(self.oob_prediction_ == 0): + # self.oob_score_ = from_table(train_result.oob_err_r2).item() + self.oob_score_ = from_table( + train_result.oob_err_r2, sua_iface=sua_iface, sycl_queue=queue, xp=xp + )[0] + # self.oob_prediction_ = from_table(train_result.oob_err_prediction).reshape(-1) + self.oob_score_ = xp.reshape( + from_table( + train_result.oob_err_r2, + sua_iface=sua_iface, + sycl_queue=queue, + xp=xp, + ), + -1, + ) + if xp.any(self.oob_prediction_ == 0): warnings.warn( "Some inputs do not have OOB scores. This probably means " "too few trees were used to compute any reliable OOB " @@ -347,37 +380,61 @@ def _create_model(self, module): def _predict(self, X, module, queue, hparams=None): _check_is_fitted(self) - X = _check_array( - X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False - ) - _check_n_features(self, X, False) + + use_raw_input = _get_config().get("use_raw_input", False) is True + sua_iface, xp, _ = _get_sycl_namespace(X) + + # All data should use the same sycl queue + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue + if not use_raw_input: + X = _check_array( + X, + dtype=[np.float64, np.float32], + force_all_finite=True, + accept_sparse=False, + ) + _check_n_features(self, X, False) policy = self._get_policy(queue, X) model = self._onedal_model X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) if hparams is not None and not hparams.is_default: - result = module.infer(policy, params, hparams.backend, model, to_table(X)) + result = module.infer( + policy, params, hparams.backend, model, to_table(X, sua_iface=sua_iface) + ) else: - result = module.infer(policy, params, model, to_table(X)) + result = module.infer(policy, params, model, to_table(X, sua_iface=sua_iface)) - y = from_table(result.responses) + y = from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp) return y def _predict_proba(self, X, module, queue): _check_is_fitted(self) - X = _check_array( - X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False - ) - _check_n_features(self, X, False) + use_raw_input = _get_config().get("use_raw_input", False) is True + sua_iface, xp, _ = _get_sycl_namespace(X) + + # All data should use the same sycl queue + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue + + if not use_raw_input: + X = _check_array( + X, + dtype=[np.float64, np.float32], + force_all_finite=True, + accept_sparse=False, + ) + _check_n_features(self, X, False) policy = self._get_policy(queue, X) X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) params["infer_mode"] = "class_probabilities" model = self._onedal_model - result = module.infer(policy, params, model, to_table(X)) - y = from_table(result.probabilities) + result = module.infer(policy, params, model, to_table(X, sua_iface=sua_iface)) + y = from_table(result.probabilities, sua_iface=sua_iface, sycl_queue=queue, xp=xp) return y @@ -461,15 +518,19 @@ def fit(self, X, y, sample_weight=None, queue=None): ) def predict(self, X, queue=None): + _, xp, _ = _get_sycl_namespace(X) hparams = get_hyperparameters("decision_forest", "infer") - pred = super()._predict( - X, - self._get_backend("decision_forest", "classification", None), - queue, - hparams, + pred = xp.reshape( + super()._predict( + X, + self._get_backend("decision_forest", "classification", None), + queue, + hparams, + ), + -1, ) - return np.take(self.classes_, pred.ravel().astype(np.int64, casting="unsafe")) + return xp.take(self.classes_, pred.astype(xp.int64, casting="unsafe")) def predict_proba(self, X, queue=None): return super()._predict_proba( @@ -536,10 +597,14 @@ def __init__( ) def fit(self, X, y, sample_weight=None, queue=None): - if sample_weight is not None: - if hasattr(sample_weight, "__array__"): - sample_weight[sample_weight == 0.0] = 1.0 - sample_weight = [sample_weight] + use_raw_input = get_config().get("use_raw_input", False) is True + # TODO: + # check if required. + if not use_raw_input: + if sample_weight is not None: + if hasattr(sample_weight, "__array__"): + sample_weight[sample_weight == 0.0] = 1.0 + sample_weight = [sample_weight] return super()._fit( X, y, @@ -549,10 +614,12 @@ def fit(self, X, y, sample_weight=None, queue=None): ) def predict(self, X, queue=None): - return ( - super() - ._predict(X, self._get_backend("decision_forest", "regression", None), queue) - .ravel() + _, xp, _ = _get_sycl_namespace(X) + return xp.reshape( + super()._predict( + X, self._get_backend("decision_forest", "regression", None), queue + ), + -1, ) diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 2a04962645..674f0bc889 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -62,6 +62,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain +from ..config import get_config from ..utils._array_api import get_namespace if sklearn_check_version("1.2"): @@ -79,19 +80,21 @@ class BaseForest(ABC): _onedal_factory = None def _onedal_fit(self, X, y, sample_weight=None, queue=None): - X, y = validate_data( - self, - X, - y, - multi_output=True, - accept_sparse=False, - dtype=[np.float64, np.float32], - force_all_finite=False, - ensure_2d=True, - ) + use_raw_input = get_config().get("use_raw_input", False) is True + if not use_raw_input: + X, y = validate_data( + self, + X, + y, + multi_output=True, + accept_sparse=False, + dtype=[np.float64, np.float32], + force_all_finite=False, + ensure_2d=True, + ) - if sample_weight is not None: - sample_weight = _check_sample_weight(sample_weight, X) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( @@ -102,20 +105,22 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): stacklevel=2, ) - if y.ndim == 1: - # reshape is necessary to preserve the data contiguity against vs - # [:, np.newaxis] that does not. - y = np.reshape(y, (-1, 1)) + if not use_raw_input: + if y.ndim == 1: + # reshape is necessary to preserve the data contiguity against vs + # [:, np.newaxis] that does not. + y = np.reshape(y, (-1, 1)) self._n_samples, self.n_outputs_ = y.shape - y, expanded_class_weight = self._validate_y_class_weight(y) + if not use_raw_input: + y, expanded_class_weight = self._validate_y_class_weight(y) - if expanded_class_weight is not None: - if sample_weight is not None: - sample_weight = sample_weight * expanded_class_weight - else: - sample_weight = expanded_class_weight + if expanded_class_weight is not None: + if sample_weight is not None: + sample_weight = sample_weight * expanded_class_weight + else: + sample_weight = expanded_class_weight if sample_weight is not None: sample_weight = [sample_weight] @@ -155,7 +160,10 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): # Compute self._onedal_estimator = self._onedal_factory(**onedal_params) - self._onedal_estimator.fit(X, np.ravel(y), sample_weight, queue=queue) + if use_raw_input: + self._onedal_estimator.fit(X, y, sample_weight, queue=queue) + else: + self._onedal_estimator.fit(X, np.ravel(y), sample_weight, queue=queue) self._save_attributes() @@ -371,6 +379,7 @@ def _estimators_(self): "nodes": check_tree_nodes(tree_i_state_class.node_ar), "values": tree_i_state_class.value_ar, } + # Note: only on host. est_i.tree_ = Tree( self.n_features_in_, np.array([n_classes_], dtype=np.intp), @@ -790,58 +799,64 @@ def _onedal_gpu_supported(self, method_name, *data): return patching_status def _onedal_predict(self, X, queue=None): - - if sklearn_check_version("1.0"): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - force_all_finite=False, - reset=False, - ensure_2d=True, - ) - else: - X = check_array( - X, - dtype=[np.float64, np.float32], - force_all_finite=False, - ) # Warning, order of dtype matters - if hasattr(self, "n_features_in_"): - try: - num_features = _num_features(X) - except TypeError: - num_features = _num_samples(X) - if num_features != self.n_features_in_: - raise ValueError( - ( - f"X has {num_features} features, " - f"but {self.__class__.__name__} is expecting " - f"{self.n_features_in_} features as input" - ) - ) - self._check_n_features(X, reset=False) - - res = self._onedal_estimator.predict(X, queue=queue) - return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe")) + xp, _ = get_namespace(X) + use_raw_input = get_config().get("use_raw_input", False) is True + if not use_raw_input: + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + force_all_finite=False, + reset=False, + ensure_2d=True, + ) + # sklearn version < 1.0 is not supported + # else: + # X = check_array( + # X, + # dtype=[np.float64, np.float32], + # force_all_finite=False, + # ) # Warning, order of dtype matters + # if hasattr(self, "n_features_in_"): + # try: + # num_features = _num_features(X) + # except TypeError: + # num_features = _num_samples(X) + # if num_features != self.n_features_in_: + # raise ValueError( + # ( + # f"X has {num_features} features, " + # f"but {self.__class__.__name__} is expecting " + # f"{self.n_features_in_} features as input" + # ) + # ) + # self._check_n_features(X, reset=False) + + res = xp.reshape(self._onedal_estimator.predict(X, queue=queue), -1) + return xp.take(self.classes_, res.astype(xp.int64, casting="unsafe")) def _onedal_predict_proba(self, X, queue=None): - - if sklearn_check_version("1.0"): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - force_all_finite=False, - reset=False, - ensure_2d=True, - ) - else: - X = check_array( - X, - dtype=[np.float64, np.float32], - force_all_finite=False, - ) # Warning, order of dtype matters - self._check_n_features(X, reset=False) + xp, _ = get_namespace(X) + use_raw_input = get_config().get("use_raw_input", False) is True + if not use_raw_input: + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + force_all_finite=False, + reset=False, + ensure_2d=True, + ) + # sklearn version < 1.0 is not supported + # else: + # X = check_array( + # X, + # dtype=[np.float64, np.float32], + # force_all_finite=False, + # ) # Warning, order of dtype matters + # self._check_n_features(X, reset=False) return self._onedal_estimator.predict_proba(X, queue=queue) @@ -1131,23 +1146,27 @@ def _onedal_gpu_supported(self, method_name, *data): def _onedal_predict(self, X, queue=None): check_is_fitted(self, "_onedal_estimator") - if sklearn_check_version("1.0"): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - force_all_finite=False, - reset=False, - ensure_2d=True, - ) # Warning, order of dtype matters - else: - X = check_array( - X, dtype=[np.float64, np.float32], force_all_finite=False - ) # Warning, order of dtype matters + if not use_raw_input: + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + force_all_finite=False, + reset=False, + ensure_2d=True, + ) # Warning, order of dtype matters + # sklearn version < 1.0 is not supported + # else: + # X = check_array( + # X, dtype=[np.float64, np.float32], force_all_finite=False + # ) # Warning, order of dtype matters return self._onedal_estimator.predict(X, queue=queue) def _onedal_score(self, X, y, sample_weight=None, queue=None): + # TODO: + # should be checked for dpctl/dpnp inputs. return r2_score( y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight ) From 362930a2e3bd7fcb52016f46820f42b4f9196fe5 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 5 Nov 2024 15:02:30 -0800 Subject: [PATCH 15/56] PCA support (batch) --- onedal/decomposition/pca.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index 7d9e38deab..51e2a9abe0 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -21,8 +21,10 @@ from sklearn.decomposition._pca import _infer_dimension from sklearn.utils.extmath import stable_cumsum +from .._config import _get_config from ..common._base import BaseEstimator from ..datatypes import _convert_to_supported, from_table, to_table +from ..utils._array_api import _get_sycl_namespace class BasePCA(BaseEstimator, metaclass=ABCMeta): @@ -128,20 +130,34 @@ def _create_model(self): return m def predict(self, X, queue=None): + sua_iface, xp, _ = _get_sycl_namespace(X) + if xp is None: + xp = np + use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and sua_iface: + queue = X.sycl_queue + policy = self._get_policy(queue, X) model = self._create_model() X = _convert_to_supported(policy, X) params = self._get_onedal_params(X, stage="predict") + X_table = to_table(X, sua_iface=sua_iface) + result = self._get_backend( - "decomposition", "dim_reduction", "infer", policy, params, model, to_table(X) + "decomposition", "dim_reduction", "infer", policy, params, model, X_table ) - return from_table(result.transformed_data) + return from_table(result.transformed_data, sua_iface=sua_iface, sycl_queue=queue, xp=xp) class PCA(BasePCA): def fit(self, X, y=None, queue=None): + use_raw_input = _get_config()["use_raw_input"] + sua_iface, xp, _ = _get_sycl_namespace(X) + if use_raw_input and sua_iface: + queue = X.sycl_queue + n_samples, n_features = X.shape n_sf_min = min(n_samples, n_features) self._validate_n_components(self.n_components, n_samples, n_features) @@ -152,14 +168,16 @@ def fit(self, X, y=None, queue=None): if isinstance(X, np.ndarray) and not X.flags["OWNDATA"]: X = X.copy() X = _convert_to_supported(policy, X) + X_table = to_table(X, sua_iface=sua_iface) params = self._get_onedal_params(X) result = self._get_backend( - "decomposition", "dim_reduction", "train", policy, params, to_table(X) + "decomposition", "dim_reduction", "train", policy, params, X_table ) - self.mean_ = from_table(result.means).ravel() - self.variances_ = from_table(result.variances) + self.mean_ = xp.reshape(from_table(result.means, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1) + self.variances_ = from_table(result.variances, sua_iface=sua_iface, sycl_queue=queue, xp=xp) + # TODO: why are there errors when using sua_iface and sycl_queue on following from_table calls? self.components_ = from_table(result.eigenvectors) self.singular_values_ = from_table(result.singular_values).ravel() self.explained_variance_ = np.maximum(from_table(result.eigenvalues).ravel(), 0) From 102dcaeb49b4d3642eee80e11315f0b98157bc33 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 5 Nov 2024 15:05:47 -0800 Subject: [PATCH 16/56] minor fix for dbscan and rf --- sklearnex/cluster/dbscan.py | 2 +- sklearnex/ensemble/_forest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index 2f57fbbd38..d4d67ee94b 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -25,9 +25,9 @@ from daal4py.sklearn._utils import sklearn_check_version from onedal.cluster import DBSCAN as onedal_DBSCAN +from .._config import get_config from .._device_offload import dispatch from .._utils import PatchingConditionsChain -from ..config import get_config if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): from sklearn.utils import check_scalar diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 674f0bc889..5d4ad6516f 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -60,9 +60,9 @@ from sklearnex import get_hyperparameters from sklearnex._utils import register_hyperparameters +from .._config import get_config from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain -from ..config import get_config from ..utils._array_api import get_namespace if sklearn_check_version("1.2"): From 6edab5b394fd6334b4a011e1afbcef997f5b220b Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 6 Nov 2024 03:10:00 -0800 Subject: [PATCH 17/56] fully fixed DBSCAN --- onedal/cluster/dbscan.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 16c5666e38..87059dcaae 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -23,7 +23,7 @@ from ..common._mixin import ClusterMixin from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array -from ..utils._array_api import _get_sycl_namespace +from ..utils._array_api import _asarray, _get_sycl_namespace class BaseDBSCAN(BaseEstimator, ClusterMixin): @@ -90,18 +90,28 @@ def _fit(self, X, y, sample_weight, module, queue): self.labels_ = xp.reshape( from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1 ) - if result.core_observation_indices is not None: + if ( + result.core_observation_indices is not None + and not result.core_observation_indices.kind == "empty" + ): self.core_sample_indices_ = xp.reshape( from_table( result.core_observation_indices, - sua_iface=sua_iface, sycl_queue=queue, + sua_iface=sua_iface, xp=xp, ), -1, ) else: - self.core_sample_indices_ = xp.array([], dtype=xp.int32) + # TODO: + # self.core_sample_indices_ = _asarray([], xp, sycl_queue=queue, dtype=xp.int32) + if sua_iface: + self.core_sample_indices_ = xp.asarray( + [], sycl_queue=queue, dtype=xp.int32 + ) + else: + self.core_sample_indices_ = xp.asarray([], dtype=xp.int32) self.components_ = xp.take(X, self.core_sample_indices_, axis=0) self.n_features_in_ = X.shape[1] return self From e153a286590e147cf16d9ab75b209376daa8f005 Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Wed, 6 Nov 2024 06:27:24 -0800 Subject: [PATCH 18/56] Add Incremental Linear Regression --- .../linear_model/incremental_linear_model.py | 68 ++++++++++++++----- onedal/linear_model/linear_model.py | 4 ++ onedal/linear_model/logistic_regression.py | 5 +- 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index f0558ad973..149b527ddd 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -18,9 +18,11 @@ from daal4py.sklearn._utils import get_dtype +from .._config import _get_config from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_X_y, _num_features +from ..utils._array_api import _get_sycl_namespace from .linear_model import BaseLinearRegression @@ -74,6 +76,13 @@ def partial_fit(self, X, y, queue=None): """ module = self._get_backend("linear_model", "regression") + self._sua_iface, self._xp, _ = _get_sycl_namespace(X, y) + if self._xp is None: + self._xp = np + use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and self._sua_iface is not None: + queue = X.sycl_queue + self._queue = queue policy = self._get_policy(queue, X) @@ -83,14 +92,19 @@ def partial_fit(self, X, y, queue=None): self._dtype = get_dtype(X) self._params = self._get_onedal_params(self._dtype) - y = np.asarray(y, dtype=self._dtype) + if not use_raw_input: + y = np.asarray(y, dtype=self._dtype) - X, y = _check_X_y( - X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False - ) + X, y = _check_X_y( + X, + y, + dtype=[np.float64, np.float32], + accept_2d_y=True, + force_all_finite=False, + ) self.n_features_in_ = _num_features(X, fallback_1d=True) - X_table, y_table = to_table(X, y) + X_table, y_table = to_table(X, y, sua_iface=self._sua_iface) hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: self._partial_result = module.partial_train( @@ -138,10 +152,15 @@ def finalize_fit(self, queue=None): self._onedal_model = result.model - packed_coefficients = from_table(result.model.packed_coefficients) + packed_coefficients = from_table( + result.model.packed_coefficients, + sua_iface=self._sua_iface, + sycl_queue=self._queue, + xp=self._xp, + ) self.coef_, self.intercept_ = ( - packed_coefficients[:, 1:].squeeze(), - packed_coefficients[:, 0].squeeze(), + self._xp.squeeze(packed_coefficients[:, 1:]), + self._xp.squeeze(packed_coefficients[:, 0]), ) return self @@ -204,6 +223,13 @@ def partial_fit(self, X, y, queue=None): """ module = self._get_backend("linear_model", "regression") + self._sua_iface, self._xp, _ = _get_sycl_namespace(X) + if self._xp is None: + self._xp = np + use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and self._sua_iface is not None: + queue = X.sycl_queue + self._queue = queue policy = self._get_policy(queue, X) @@ -213,14 +239,19 @@ def partial_fit(self, X, y, queue=None): self._dtype = get_dtype(X) self._params = self._get_onedal_params(self._dtype) - y = np.asarray(y, dtype=self._dtype) + if not use_raw_input: + y = np.asarray(y, dtype=self._dtype) - X, y = _check_X_y( - X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False - ) + X, y = _check_X_y( + X, + y, + dtype=[np.float64, np.float32], + accept_2d_y=True, + force_all_finite=False, + ) self.n_features_in_ = _num_features(X, fallback_1d=True) - X_table, y_table = to_table(X, y) + X_table, y_table = to_table(X, y, sua_iface=self._sua_iface) self._partial_result = module.partial_train( policy, self._params, self._partial_result, X_table, y_table ) @@ -249,10 +280,15 @@ def finalize_fit(self, queue=None): self._onedal_model = result.model - packed_coefficients = from_table(result.model.packed_coefficients) + packed_coefficients = from_table( + result.model.packed_coefficients, + sua_iface=self._sua_iface, + sycl_queue=self._queue, + xp=self._xp, + ) self.coef_, self.intercept_ = ( - packed_coefficients[:, 1:].squeeze(), - packed_coefficients[:, 0].squeeze(), + self._xp.squeeze(packed_coefficients[:, 1:]), + self._xp.squeeze(packed_coefficients[:, 0]), ) return self diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index 196292a03f..b693d52798 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -207,6 +207,8 @@ def fit(self, X, y, queue=None): if xp is None: xp = np use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue if not use_raw_input: # TODO Fix _check_X_y to make sure this conversion is there @@ -314,6 +316,8 @@ def fit(self, X, y, queue=None): if xp is None: xp = np use_raw_input = _get_config().get("use_raw_input") is True + if use_raw_input and sua_iface is not None: + queue = X.sycl_queue if not use_raw_input: X = _check_array( diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index 730cfd8cb5..26068d4e05 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -67,7 +67,8 @@ def _get_onedal_params(self, is_csr, dtype=np.float32): def _fit(self, X, y, module, queue): use_raw_input = _get_config().get("use_raw_input") is True - if use_raw_input and _get_sycl_namespace(X)[0] is not None: + sua_iface = _get_sycl_namespace(X, y)[0] + if use_raw_input and sua_iface is not None: queue = X.sycl_queue sparsity_enabled = daal_check_version((2024, "P", 700)) @@ -96,7 +97,7 @@ def _fit(self, X, y, module, queue): policy = self._get_policy(queue, X, y) X, y = _convert_to_supported(policy, X, y) params = self._get_onedal_params(is_csr, get_dtype(X)) - sua_iface = _get_sycl_namespace(X, y)[0] + X_table, y_table = to_table(X, y, sua_iface=sua_iface) result = module.train(policy, params, X_table, y_table) From 37d32c9ffb88f22fd0c816e4f189452be7e9fa10 Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Wed, 6 Nov 2024 06:28:23 -0800 Subject: [PATCH 19/56] Linting --- onedal/decomposition/pca.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index 51e2a9abe0..c6ea9ea01d 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -147,7 +147,9 @@ def predict(self, X, queue=None): result = self._get_backend( "decomposition", "dim_reduction", "infer", policy, params, model, X_table ) - return from_table(result.transformed_data, sua_iface=sua_iface, sycl_queue=queue, xp=xp) + return from_table( + result.transformed_data, sua_iface=sua_iface, sycl_queue=queue, xp=xp + ) class PCA(BasePCA): @@ -175,8 +177,12 @@ def fit(self, X, y=None, queue=None): "decomposition", "dim_reduction", "train", policy, params, X_table ) - self.mean_ = xp.reshape(from_table(result.means, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1) - self.variances_ = from_table(result.variances, sua_iface=sua_iface, sycl_queue=queue, xp=xp) + self.mean_ = xp.reshape( + from_table(result.means, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1 + ) + self.variances_ = from_table( + result.variances, sua_iface=sua_iface, sycl_queue=queue, xp=xp + ) # TODO: why are there errors when using sua_iface and sycl_queue on following from_table calls? self.components_ = from_table(result.eigenvectors) self.singular_values_ = from_table(result.singular_values).ravel() From 71c513562ce39e2c32b58a370831919bbac5877c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 6 Nov 2024 07:11:30 -0800 Subject: [PATCH 20/56] add modification to knn --- onedal/_device_offload.py | 2 +- onedal/neighbors/neighbors.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index ded14acaf2..5558d319f7 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -184,7 +184,7 @@ def wrapper_impl(obj, *args, **kwargs): if "queue" not in kwargs: usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None) data_queue = ( - usm_iface["syclobj"] if usm_iface is not None else data_queue + usm_iface["syclobj"] if usm_iface is not None else None ) kwargs["queue"] = data_queue return _run_on_device(func, obj, *args, **kwargs) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 2fc115eac5..4d4f58a0db 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -19,6 +19,9 @@ import numpy as np +from .._config import _get_config +from ..utils._array_api import _get_sycl_namespace + from daal4py import ( bf_knn_classification_model, bf_knn_classification_prediction, @@ -452,8 +455,9 @@ def _onedal_predict(self, model, X, params, queue): if "responses" not in params["result_option"]: params["result_option"] += "|responses" params["fptype"] = "float" if X.dtype == np.float32 else "double" + X_table = to_table(X, sua_iface=_get_sycl_namespace(X)[0]) result = self._get_backend( - "neighbors", "classification", "infer", policy, params, model, to_table(X) + "neighbors", "classification", "infer", policy, params, model, X_table ) return result @@ -462,7 +466,8 @@ def fit(self, X, y, queue=None): return super()._fit(X, y, queue=queue) def predict(self, X, queue=None): - X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) + if not _get_config()["use_raw_input"]: + X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) onedal_model = getattr(self, "_onedal_model", None) n_features = getattr(self, "n_features_in_", None) n_samples_fit_ = getattr(self, "n_samples_fit_", None) From db9f0216a0c32382f3f12174a0a2cf0702a60cae Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 6 Nov 2024 09:08:54 -0800 Subject: [PATCH 21/56] minor update for RF --- onedal/ensemble/forest.py | 4 ++++ sklearnex/ensemble/_forest.py | 12 +++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index c3e383bf60..a715249f4c 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -307,6 +307,10 @@ def _fit(self, X, y, sample_weight, module, queue): accept_sparse="csr", ) y = self._validate_targets(y, X.dtype) + else: + # TODO: + # check it first. + self.classes_ = None self.n_features_in_ = X.shape[1] diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 5d4ad6516f..71a10025b7 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -81,6 +81,7 @@ class BaseForest(ABC): def _onedal_fit(self, X, y, sample_weight=None, queue=None): use_raw_input = get_config().get("use_raw_input", False) is True + xp, _ = get_namespace(X) if not use_raw_input: X, y = validate_data( self, @@ -105,11 +106,11 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): stacklevel=2, ) - if not use_raw_input: - if y.ndim == 1: - # reshape is necessary to preserve the data contiguity against vs - # [:, np.newaxis] that does not. - y = np.reshape(y, (-1, 1)) + # if not use_raw_input: + if y.ndim == 1: + # reshape is necessary to preserve the data contiguity against vs + # [:, np.newaxis] that does not. + y = xp.reshape(y, (-1, 1)) self._n_samples, self.n_outputs_ = y.shape @@ -123,6 +124,7 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): sample_weight = expanded_class_weight if sample_weight is not None: sample_weight = [sample_weight] + self.n_features_in_ = X.shape[1] onedal_params = { "n_estimators": self.n_estimators, From bc353da28d2a2367f3f352616fd354b72f781347 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 7 Nov 2024 03:28:09 -0800 Subject: [PATCH 22/56] fix for RandomForestClassifier --- onedal/ensemble/forest.py | 26 +++++++++++++++++++++----- sklearnex/ensemble/_forest.py | 26 ++++++++++++++++---------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index a715249f4c..38457ceda0 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -310,7 +310,7 @@ def _fit(self, X, y, sample_weight, module, queue): else: # TODO: # check it first. - self.classes_ = None + self.classes_ = xp.unique_all(y).values self.n_features_in_ = X.shape[1] @@ -320,12 +320,26 @@ def _fit(self, X, y, sample_weight, module, queue): data = (X, y, sample_weight) else: data = (X, y) + policy = self._get_policy(queue, *data) data = _convert_to_supported(policy, *data) params = self._get_onedal_params(data[0]) - # TODO: - # check for None sample_weight. - train_result = module.train(policy, params, *to_table(*data, sua_iface=sua_iface)) + + if sample_weight is not None and len(sample_weight) > 0: + train_result = train_result = module.train( + policy, + params, + to_table(X, sua_iface=sua_iface), + to_table(y, sua_iface=sua_iface), + to_table(sample_weight, _get_sycl_namespace(sample_weight)[0]), + ) + else: + train_result = train_result = module.train( + policy, + params, + to_table(X, sua_iface=sua_iface), + to_table(y, sua_iface=sua_iface), + ) self._onedal_model = train_result.model @@ -534,7 +548,9 @@ def predict(self, X, queue=None): -1, ) - return xp.take(self.classes_, pred.astype(xp.int64, casting="unsafe")) + # return xp.take(self.classes_, pred.astype(xp.int64, casting="unsafe")) + pred = xp.astype(pred, xp.int64) + return xp.take(self.classes_, pred) def predict_proba(self, X, queue=None): return super()._predict_proba( diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 71a10025b7..104ef65e8a 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -96,6 +96,9 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) + else: + self.classes_ = xp.unique_all(y).values + self.n_classes_ = len(self.classes_) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( @@ -106,13 +109,16 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): stacklevel=2, ) - # if not use_raw_input: - if y.ndim == 1: - # reshape is necessary to preserve the data contiguity against vs - # [:, np.newaxis] that does not. - y = xp.reshape(y, (-1, 1)) + if not use_raw_input: + if y.ndim == 1: + # reshape is necessary to preserve the data contiguity against vs + # [:, np.newaxis] that does not. + y = xp.reshape(y, (-1, 1)) - self._n_samples, self.n_outputs_ = y.shape + if y.ndim == 1: + self._n_samples, self.n_outputs_ = y.shape[0], 1 + else: + self._n_samples, self.n_outputs_ = y.shape if not use_raw_input: y, expanded_class_weight = self._validate_y_class_weight(y) @@ -122,8 +128,8 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): sample_weight = sample_weight * expanded_class_weight else: sample_weight = expanded_class_weight - if sample_weight is not None: - sample_weight = [sample_weight] + if sample_weight is not None: + sample_weight = [sample_weight] self.n_features_in_ = X.shape[1] onedal_params = { @@ -834,9 +840,9 @@ def _onedal_predict(self, X, queue=None): # ) # ) # self._check_n_features(X, reset=False) - res = xp.reshape(self._onedal_estimator.predict(X, queue=queue), -1) - return xp.take(self.classes_, res.astype(xp.int64, casting="unsafe")) + res = xp.astype(res, xp.int64) + return xp.take(self.classes_, res) def _onedal_predict_proba(self, X, queue=None): xp, _ = get_namespace(X) From e8732050304196f6d61865d93561aab062969d33 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 7 Nov 2024 03:31:28 -0800 Subject: [PATCH 23/56] minor for RF --- sklearnex/ensemble/_forest.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 104ef65e8a..7b65435788 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -17,6 +17,7 @@ import numbers import warnings from abc import ABC +from collections.abc import Iterable import numpy as np from scipy import sparse as sp @@ -177,7 +178,11 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): # Decapsulate classes_ attributes if hasattr(self, "classes_") and self.n_outputs_ == 1: - self.n_classes_ = self.n_classes_[0] + self.n_classes_ = ( + self.n_classes_[0] + if isinstance(self.n_classes_, Iterable) + else self.n_classes_ + ) self.classes_ = self.classes_[0] return self From fe3222a8add3f46d8cf0d2302326837ea90e175b Mon Sep 17 00:00:00 2001 From: "Kruglov, Oleg" Date: Thu, 7 Nov 2024 04:06:22 -0800 Subject: [PATCH 24/56] Update online algos --- .../incremental_basic_statistics.py | 4 +- onedal/covariance/incremental_covariance.py | 29 +++++++-- onedal/decomposition/incremental_pca.py | 63 ++++++++++++++++--- .../tests/test_incremental_covariance.py | 23 ++++--- .../tests/test_incremental_pca.py | 22 ++++--- 5 files changed, 106 insertions(+), 35 deletions(-) diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index ef4f89bd19..9b39061018 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -95,7 +95,7 @@ def partial_fit(self, X, weights=None, queue=None): self : object Returns the instance itself. """ - use_raw_input = _get_config().get("use_raw_input", False) is True + use_raw_input = _get_config().get("use_raw_input", False) sua_iface, xp, _ = _get_sycl_namespace(X) # Saving input array namespace and sua_iface, that will be used in # finalize_fit. @@ -103,7 +103,7 @@ def partial_fit(self, X, weights=None, queue=None): self._input_xp = xp # All data should use the same sycl queue - if use_raw_input and sua_iface is not None: + if use_raw_input and sua_iface: queue = X.sycl_queue self._queue = queue diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index baa6d48163..be5a541c2d 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -17,8 +17,10 @@ from daal4py.sklearn._utils import daal_check_version, get_dtype +from .._config import _get_config from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array +from ..utils._array_api import _get_sycl_namespace from .covariance import BaseEmpiricalCovariance @@ -83,7 +85,19 @@ def partial_fit(self, X, y=None, queue=None): self : object Returns the instance itself. """ - X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) + use_raw_input = _get_config().get("use_raw_input", False) + sua_iface, xp, _ = _get_sycl_namespace(X) + # Saving input array namespace and sua_iface, that will be used in + # finalize_fit. + self._input_sua_iface = sua_iface + self._input_xp = xp + + # All data should use the same sycl queue + if use_raw_input and sua_iface: + queue = X.sycl_queue + + if not use_raw_input or True: + X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) self._queue = queue @@ -122,10 +136,8 @@ def finalize_fit(self, queue=None): Returns the instance itself. """ params = self._get_onedal_params(self._dtype) - if queue is not None: - policy = self._get_policy(queue) - else: - policy = self._get_policy(self._queue) + queue = queue if queue is not None else self._queue + policy = self._get_policy(queue) result = self._get_backend( "covariance", @@ -136,7 +148,12 @@ def finalize_fit(self, queue=None): self._partial_result, ) if daal_check_version((2024, "P", 1)) or (not self.bias): - self.covariance_ = from_table(result.cov_matrix) + self.covariance_ = from_table( + result.cov_matrix, + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, + ) else: n_rows = self._partial_result.partial_n_rows self.covariance_ = from_table(result.cov_matrix) * (n_rows - 1) / n_rows diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index 7199c1e1c2..eb37707f56 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -18,8 +18,10 @@ from daal4py.sklearn._utils import get_dtype +from .._config import _get_config from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array +from ..utils._array_api import _get_sycl_namespace from .pca import BasePCA @@ -122,7 +124,21 @@ def partial_fit(self, X, queue): self : object Returns the instance itself. """ - X = _check_array(X) + + use_raw_input = _get_config().get("use_raw_input", False) + sua_iface, xp, _ = _get_sycl_namespace(X) + # Saving input array namespace and sua_iface, that will be used in + # finalize_fit. + self._input_sua_iface = sua_iface + self._input_xp = xp + + # All data should use the same sycl queue + if use_raw_input and sua_iface: + queue = X.sycl_queue + + if not use_raw_input or True: + X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) + n_samples, n_features = X.shape first_pass = not hasattr(self, "components_") @@ -189,13 +205,46 @@ def finalize_fit(self, queue=None): self._params, self._partial_result, ) - self.mean_ = from_table(result.means).ravel() - self.var_ = from_table(result.variances).ravel() - self.components_ = from_table(result.eigenvectors) - self.singular_values_ = np.nan_to_num(from_table(result.singular_values).ravel()) - self.explained_variance_ = np.maximum(from_table(result.eigenvalues).ravel(), 0) + self.mean_ = from_table( + result.means, + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, + ).ravel() + self.var_ = from_table( + result.variances, + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, + ).ravel() + self.components_ = from_table( + result.eigenvectors, + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, + ) + self.singular_values_ = np.nan_to_num( + from_table( + result.singular_values, + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, + ).ravel() + ) + self.explained_variance_ = np.maximum( + from_table( + result.eigenvalues, + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, + ).ravel(), + 0, + ) self.explained_variance_ratio_ = from_table( - result.explained_variances_ratio + result.explained_variances_ratio, + sua_iface=self._input_sua_iface, + sycl_queue=queue, + xp=self._input_xp, ).ravel() self.noise_variance_ = self._compute_noise_variance( self.n_components_, min(self.n_samples_seen_, self.n_features_in_) diff --git a/sklearnex/covariance/tests/test_incremental_covariance.py b/sklearnex/covariance/tests/test_incremental_covariance.py index 0b44c2de7d..1607079236 100644 --- a/sklearnex/covariance/tests/test_incremental_covariance.py +++ b/sklearnex/covariance/tests/test_incremental_covariance.py @@ -32,6 +32,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex import config_context @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -115,8 +116,9 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype): @pytest.mark.parametrize("row_count", [100, 1000]) @pytest.mark.parametrize("column_count", [10, 100]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("use_raw_input", [True, False]) def test_sklearnex_partial_fit_on_random_data( - dataframe, queue, num_batches, row_count, column_count, dtype + dataframe, queue, num_batches, row_count, column_count, dtype, use_raw_input ): from sklearnex.covariance import IncrementalEmpiricalCovariance @@ -127,17 +129,18 @@ def test_sklearnex_partial_fit_on_random_data( X_split = np.array_split(X, num_batches) inccov = IncrementalEmpiricalCovariance() - for i in range(num_batches): - X_split_df = _convert_to_dataframe( - X_split[i], sycl_queue=queue, target_df=dataframe - ) - result = inccov.partial_fit(X_split_df) + with config_context(use_raw_input=use_raw_input): + for i in range(num_batches): + X_split_df = _convert_to_dataframe( + X_split[i], sycl_queue=queue, target_df=dataframe + ) + result = inccov.partial_fit(X_split_df) - expected_covariance = np.cov(X.T, bias=1) - expected_means = np.mean(X, axis=0) + expected_covariance = np.cov(X.T, bias=1) + expected_means = np.mean(X, axis=0) - assert_allclose(expected_covariance, result.covariance_, atol=1e-6) - assert_allclose(expected_means, result.location_, atol=1e-6) + assert_allclose(expected_covariance, result.covariance_, atol=1e-6) + assert_allclose(expected_means, result.location_, atol=1e-6) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index 67929bfac8..f6047a4cdc 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -25,6 +25,7 @@ get_dataframes_and_queues, ) from sklearnex.preview.decomposition import IncrementalPCA +from sklearnex import config_context @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -245,8 +246,9 @@ def test_sklearnex_fit_transform_on_gold_data( @pytest.mark.parametrize("row_count", [100, 1000]) @pytest.mark.parametrize("column_count", [10, 100]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("use_raw_input", [True, False]) def test_sklearnex_partial_fit_on_random_data( - dataframe, queue, n_components, whiten, num_blocks, row_count, column_count, dtype + dataframe, queue, n_components, whiten, num_blocks, row_count, column_count, dtype, use_raw_input ): seed = 81 gen = np.random.default_rng(seed) @@ -254,13 +256,13 @@ def test_sklearnex_partial_fit_on_random_data( X = X.astype(dtype=dtype) X_split = np.array_split(X, num_blocks) incpca = IncrementalPCA(n_components=n_components, whiten=whiten) + with config_context(use_raw_input=use_raw_input): + for i in range(num_blocks): + X_split_df = _convert_to_dataframe( + X_split[i], sycl_queue=queue, target_df=dataframe + ) + incpca.partial_fit(X_split_df) - for i in range(num_blocks): - X_split_df = _convert_to_dataframe( - X_split[i], sycl_queue=queue, target_df=dataframe - ) - incpca.partial_fit(X_split_df) - - X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) - transformed_data = incpca.transform(X_df) - check_pca(incpca, dtype, whiten, X, transformed_data) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + transformed_data = incpca.transform(X_df) + check_pca(incpca, dtype, whiten, X, transformed_data) From eaaab32dd808b190286e6678f7725544cd6ee0ae Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 7 Nov 2024 04:59:46 -0800 Subject: [PATCH 25/56] fix for RF regressor minor refactoring --- onedal/ensemble/forest.py | 8 ++++---- sklearnex/ensemble/_forest.py | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 38457ceda0..da19649fc6 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -291,7 +291,7 @@ def _get_sample_weight(self, sample_weight, X): return sample_weight def _fit(self, X, y, sample_weight, module, queue): - use_raw_input = _get_config().get("use_raw_input", False) is True + use_raw_input = _get_config()["use_raw_input"] sua_iface, xp, _ = _get_sycl_namespace(X) # All data should use the same sycl queue @@ -399,7 +399,7 @@ def _create_model(self, module): def _predict(self, X, module, queue, hparams=None): _check_is_fitted(self) - use_raw_input = _get_config().get("use_raw_input", False) is True + use_raw_input = _get_config()["use_raw_input"] sua_iface, xp, _ = _get_sycl_namespace(X) # All data should use the same sycl queue @@ -430,7 +430,7 @@ def _predict(self, X, module, queue, hparams=None): def _predict_proba(self, X, module, queue): _check_is_fitted(self) - use_raw_input = _get_config().get("use_raw_input", False) is True + use_raw_input = _get_config()["use_raw_input"] sua_iface, xp, _ = _get_sycl_namespace(X) # All data should use the same sycl queue @@ -617,7 +617,7 @@ def __init__( ) def fit(self, X, y, sample_weight=None, queue=None): - use_raw_input = get_config().get("use_raw_input", False) is True + use_raw_input = _get_config()["use_raw_input"] # TODO: # check if required. if not use_raw_input: diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 7b65435788..455d41d9ae 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -81,7 +81,7 @@ class BaseForest(ABC): _onedal_factory = None def _onedal_fit(self, X, y, sample_weight=None, queue=None): - use_raw_input = get_config().get("use_raw_input", False) is True + use_raw_input = get_config()["use_raw_input"] xp, _ = get_namespace(X) if not use_raw_input: X, y = validate_data( @@ -813,7 +813,7 @@ def _onedal_gpu_supported(self, method_name, *data): def _onedal_predict(self, X, queue=None): xp, _ = get_namespace(X) - use_raw_input = get_config().get("use_raw_input", False) is True + use_raw_input = get_config()["use_raw_input"] if not use_raw_input: if sklearn_check_version("1.0"): X = validate_data( @@ -851,7 +851,7 @@ def _onedal_predict(self, X, queue=None): def _onedal_predict_proba(self, X, queue=None): xp, _ = get_namespace(X) - use_raw_input = get_config().get("use_raw_input", False) is True + use_raw_input = get_config()["use_raw_input"] if not use_raw_input: if sklearn_check_version("1.0"): X = validate_data( @@ -1158,6 +1158,7 @@ def _onedal_gpu_supported(self, method_name, *data): def _onedal_predict(self, X, queue=None): check_is_fitted(self, "_onedal_estimator") + use_raw_input = get_config()["use_raw_input"] if not use_raw_input: if sklearn_check_version("1.0"): From a7f0c2dc93b48a6fa6560b5eb6a2b280b3fcfe72 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 7 Nov 2024 10:48:07 -0800 Subject: [PATCH 26/56] fix workaround for knn --- onedal/_device_offload.py | 4 +++- onedal/neighbors/neighbors.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 5558d319f7..90a9bcf421 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -180,7 +180,9 @@ def support_input_format(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - if _get_config()["use_raw_input"] is True: + # Check if the function is KNeighborsClassifier.fit + override_raw_input = obj.__class__.__name__ == "KNeighborsClassifier" and func.__name__ == "fit" + if _get_config()["use_raw_input"] is True and not override_raw_input: if "queue" not in kwargs: usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None) data_queue = ( diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 4d4f58a0db..bd6e18d41c 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -596,7 +596,9 @@ def _onedal_fit(self, X, y, queue): train_alg_srch = self._get_backend("neighbors", "search", None) if gpu_device: - return train_alg_regr.train(policy, params, *to_table(X, y)).model + X_table = to_table(X, sua_iface=_get_sycl_namespace(X)[0]) + y_table = to_table(X, sua_iface=_get_sycl_namespace(y)[0]) + return train_alg_regr.train(policy, params, X_table, y_table).model return train_alg_srch.train(policy, params, to_table(X)).model def _onedal_predict(self, model, X, params, queue): From d9a29668f3aa4d4158d9b6413d3b1d8622a6c5a1 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 12 Nov 2024 13:22:29 -0800 Subject: [PATCH 27/56] kmeans predict support --- onedal/cluster/kmeans.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index fea3bd1055..3bff2f7bdc 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -399,7 +399,9 @@ def _predict(self, X, module, queue=None, result_options=None): policy = self._get_policy(queue, X) X = _convert_to_supported(policy, X) - X_table, dtype = to_table(X), X.dtype + sua_iface = _get_sycl_namespace(X)[0] + dtype = X.dtype + X_table = to_table(X, sua_iface=sua_iface) params = self._get_onedal_params(is_csr, dtype, result_options) result = module.infer(policy, params, self.model_, X_table) From 42c36148489224f1d948eb43182a55ac30535a50 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Mon, 16 Dec 2024 05:25:10 -0800 Subject: [PATCH 28/56] fix merge errors --- onedal/cluster/kmeans.py | 1 + onedal/covariance/covariance.py | 14 ++++----- onedal/decomposition/incremental_pca.py | 2 +- onedal/decomposition/pca.py | 13 ++------ onedal/ensemble/forest.py | 4 +-- .../linear_model/incremental_linear_model.py | 30 ++++++++----------- onedal/linear_model/linear_model.py | 7 ++--- onedal/linear_model/logistic_regression.py | 6 ---- onedal/utils/_array_api.py | 4 ++- 9 files changed, 28 insertions(+), 53 deletions(-) diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index 40bc739285..b8d35184c1 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -261,6 +261,7 @@ def _fit_backend( ) def _fit(self, X, module, queue=None): + policy = self._get_policy(queue, X) is_csr = _is_csr(X) use_raw_input = _get_config().get("use_raw_input") is True if use_raw_input and _get_sycl_namespace(X)[0] is not None: diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index fdad408bc5..84771a9318 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -23,6 +23,8 @@ from ..common._base import BaseEstimator from ..common.hyperparameters import get_hyperparameters from ..datatypes import from_table, to_table +from ..utils import _check_array +from ..utils._array_api import _get_sycl_namespace class BaseEmpiricalCovariance(BaseEstimator, metaclass=ABCMeta): @@ -117,20 +119,14 @@ def fit(self, X, y=None, queue=None): else: result = self._get_backend("covariance", None, "compute", policy, params, X) if daal_check_version((2024, "P", 1)) or (not self.bias): - self.covariance_ = from_table( - result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp - ) + self.covariance_ = from_table(result.cov_matrix, sycl_queue=queue) else: self.covariance_ = ( - from_table( - result.cov_matrix, sua_iface=sua_iface, sycl_queue=queue, xp=xp - ) + from_table(result.cov_matrix, sycl_queue=queue) * (X.shape[0] - 1) / X.shape[0] ) - self.location_ = xp.reshape( - from_table(result.means, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1 - ) + self.location_ = xp.reshape(from_table(result.means, sycl_queue=queue), -1) return self diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index d2e9172304..c9dd3589ca 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -224,5 +224,5 @@ def finalize_fit(self, queue=None): self.noise_variance_ = self._compute_noise_variance( self.n_components_, min(self.n_samples_seen_, self.n_features_in_) ) - self._need_to_finalize = False + self._need_to_finalize = False return self diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index ce11623fa5..a87b04c5bc 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -24,6 +24,7 @@ from .._config import _get_config from ..common._base import BaseEstimator from ..datatypes import from_table, to_table +from ..utils._array_api import _get_sycl_namespace class BasePCA(BaseEstimator, metaclass=ABCMeta): @@ -129,23 +130,15 @@ def _create_model(self): return m def predict(self, X, queue=None): - sua_iface, xp, _ = _get_sycl_namespace(X) - if xp is None: - xp = np - use_raw_input = _get_config().get("use_raw_input") is True - if use_raw_input and sua_iface: - queue = X.sycl_queue - policy = self._get_policy(queue, X) model = self._create_model() X_table = to_table(X, queue=queue) params = self._get_onedal_params(X_table, stage="predict") - X_table = to_table(X, sua_iface=sua_iface) - - return self._get_backend( + result = self._get_backend( "decomposition", "dim_reduction", "infer", policy, params, model, X_table ) + return from_table(result.transformed_data) class PCA(BasePCA): diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 28649e2d15..5327dedecb 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -534,9 +534,7 @@ def predict(self, X, queue=None): -1, ) - # return xp.take(self.classes_, pred.astype(xp.int64, casting="unsafe")) - pred = xp.astype(pred, xp.int64) - return xp.take(self.classes_, pred) + return xp.take(self.classes_, pred.astype(xp.int64, casting="unsafe")) def predict_proba(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index d48182b134..4e3021e07a 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -16,8 +16,6 @@ import numpy as np -from daal4py.sklearn._utils import get_dtype - from .._config import _get_config from ..common.hyperparameters import get_hyperparameters from ..datatypes import from_table, to_table @@ -74,11 +72,8 @@ def partial_fit(self, X, y, queue=None): self : object Returns the instance itself. """ - module = self._get_backend("linear_model", "regression") - - self._sua_iface, self._xp, _ = _get_sycl_namespace(X, y) - if self._xp is None: - self._xp = np + if not hasattr(self, "_params"): + self._params = self._get_onedal_params(X.dtype) use_raw_input = _get_config().get("use_raw_input") is True if use_raw_input and self._sua_iface is not None: @@ -93,17 +88,17 @@ def partial_fit(self, X, y, queue=None): ) y = np.asarray(y, dtype=X.dtype) + X_table, y_table = to_table(X, y, queue=queue) + + module = self._get_backend("linear_model", "regression") + + self._sua_iface, self._xp, _ = _get_sycl_namespace(X, y) + self._queue = queue policy = self._get_policy(queue, X) self.n_features_in_ = _num_features(X, fallback_1d=True) - X_table, y_table = to_table(X, y, queue=queue) - - if not hasattr(self, "_dtype"): - self._dtype = X_table.dtype - self._params = self._get_onedal_params(self._dtype) - hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: self._partial_result = module.partial_train( @@ -220,6 +215,9 @@ def partial_fit(self, X, y, queue=None): self : object Returns the instance itself. """ + if not hasattr(self, "_params"): + self._params = self._get_onedal_params(X.dtype) + module = self._get_backend("linear_model", "regression") self._sua_iface, self._xp, _ = _get_sycl_namespace(X) @@ -240,16 +238,12 @@ def partial_fit(self, X, y, queue=None): accept_2d_y=True, force_all_finite=False, ) - y = np.asarray(y, dtype=self._dtype) + y = np.asarray(y, dtype=X.dtype) self.n_features_in_ = _num_features(X, fallback_1d=True) X_table, y_table = to_table(X, y, queue=queue) - if not hasattr(self, "_dtype"): - self._dtype = X_table.dtype - self._params = self._get_onedal_params(self._dtype) - self._partial_result = module.partial_train( policy, self._params, self._partial_result, X_table, y_table ) diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index 0de63f8672..6fc5d2ce33 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -122,8 +122,6 @@ def predict(self, X, queue=None): _check_is_fitted(self) sua_iface, xp, _ = _get_sycl_namespace(X) - if xp is None: - xp = np use_raw_input = _get_config().get("use_raw_input") is True policy = self._get_policy(queue, X) @@ -203,8 +201,6 @@ def fit(self, X, y, queue=None): module = self._get_backend("linear_model", "regression") sua_iface, xp, _ = _get_sycl_namespace(X) - if xp is None: - xp = np use_raw_input = _get_config().get("use_raw_input") is True if use_raw_input and sua_iface is not None: queue = X.sycl_queue @@ -309,6 +305,7 @@ def fit(self, X, y, queue=None): Fitted Estimator. """ module = self._get_backend("linear_model", "regression") + _, xp, _ = _get_sycl_namespace(X) if not isinstance(X, np.ndarray): X = np.asarray(X) @@ -341,7 +338,7 @@ def fit(self, X, y, queue=None): self._onedal_model = result.model packed_coefficients = from_table( - result.model.packed_coefficients, sua_iface=sua_iface, sycl_queue=queue, xp=xp + result.model.packed_coefficients, sycl_queue=queue ) self.coef_, self.intercept_ = ( packed_coefficients[:, 1:], diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index c8f3aebfbf..147a3686b7 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -202,8 +202,6 @@ def _infer(self, X, module, queue, sua_iface): def _predict(self, X, module, queue): use_raw_input = _get_config().get("use_raw_input") is True sua_iface, xp, _ = _get_sycl_namespace(X) - if xp is None: - xp = np if use_raw_input and sua_iface is not None: queue = X.sycl_queue @@ -215,8 +213,6 @@ def _predict(self, X, module, queue): def _predict_proba(self, X, module, queue): use_raw_input = _get_config().get("use_raw_input") is True sua_iface, xp, _ = _get_sycl_namespace(X) - if xp is None: - xp = np if use_raw_input and sua_iface is not None: queue = X.sycl_queue @@ -228,8 +224,6 @@ def _predict_proba(self, X, module, queue): def _predict_log_proba(self, X, module, queue): _, xp, _ = _get_sycl_namespace(X) - if xp is None: - xp = np y_proba = self._predict_proba(X, module, queue) return xp.log(y_proba) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 47da103da9..6daa8123a9 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -18,6 +18,8 @@ from collections.abc import Iterable +import numpy as np + from ._dpep_helpers import dpctl_available, dpnp_available if dpctl_available: @@ -78,4 +80,4 @@ def _get_sycl_namespace(*arrays): else: raise ValueError(f"SYCL type not recognized: {sua_iface}") - return sua_iface, None, False + return sua_iface, np, False From 53bcc7b476b6cf0cd545375b5f40a15b2dc7e7e2 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 02:59:38 -0800 Subject: [PATCH 29/56] fix some tests --- .../incremental_basic_statistics.py | 1 + onedal/covariance/incremental_covariance.py | 4 +-- onedal/decomposition/incremental_pca.py | 5 ++-- sklearnex/_device_offload.py | 2 +- .../covariance/incremental_covariance.py | 1 - .../tests/test_incremental_covariance.py | 29 +++++++++++++++++-- .../tests/test_incremental_pca.py | 1 - 7 files changed, 32 insertions(+), 11 deletions(-) diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index a21bcf3316..896758cac0 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -84,6 +84,7 @@ def __getstate__(self): self.finalize_fit() data = self.__dict__.copy() data.pop("_queue", None) + data.pop("_input_xp", None) # module cannot be pickled return data diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index 60f2a778d2..51cc5d18d3 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -72,6 +72,7 @@ def __getstate__(self): self.finalize_fit() data = self.__dict__.copy() data.pop("_queue", None) + data.pop("_input_xp", None) # module cannot be pickled return data @@ -98,8 +99,8 @@ def partial_fit(self, X, y=None, queue=None): Returns the instance itself. """ # Saving input array namespace and sua_iface, that will be used in - sua_iface, xp, _ = _get_sycl_namespace(X) # finalize_fit. + sua_iface, xp, _ = _get_sycl_namespace(X) self._input_sua_iface = sua_iface self._input_xp = xp @@ -110,7 +111,6 @@ def partial_fit(self, X, y=None, queue=None): X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) self._queue = queue - policy = self._get_policy(queue, X) X_table = to_table(X, queue=queue) diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index c9dd3589ca..e25343a981 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -113,7 +113,7 @@ def __getstate__(self): self.finalize_fit() data = self.__dict__.copy() data.pop("_queue", None) - + data.pop("_input_xp", None) # module cannot be pickled return data def partial_fit(self, X, queue): @@ -144,8 +144,7 @@ def partial_fit(self, X, queue): # All data should use the same sycl queue if use_raw_input and sua_iface: queue = X.sycl_queue - - if not use_raw_input or True: + if not use_raw_input: X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) n_samples, n_features = X.shape diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 76da097fe9..cc6d627fdc 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -58,7 +58,7 @@ def _get_backend(obj, queue, method_name, *data): def dispatch(obj, method_name, branches, *args, **kwargs): - if not get_config()["use_raw_input"] == True: + if get_config()["use_raw_input"] is not True: q = _get_global_queue() has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 89ed92b601..61e2cb7f51 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -184,7 +184,6 @@ def location_(self): ) def _onedal_partial_fit(self, X, queue=None, check_input=True): - first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 # finite check occurs on onedal side diff --git a/sklearnex/covariance/tests/test_incremental_covariance.py b/sklearnex/covariance/tests/test_incremental_covariance.py index 4a57b2b5b8..faf8342aeb 100644 --- a/sklearnex/covariance/tests/test_incremental_covariance.py +++ b/sklearnex/covariance/tests/test_incremental_covariance.py @@ -122,14 +122,37 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype): assert_allclose(expected_means, result.location_) -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="numpy,dpnp,dpctl") +) @pytest.mark.parametrize("num_batches", [2, 10]) @pytest.mark.parametrize("row_count", [100, 1000]) @pytest.mark.parametrize("column_count", [10, 100]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("use_raw_input", [True, False]) -def test_sklearnex_partial_fit_on_random_data( +def test_sklearnex_partial_fit_on_random_data_raw( dataframe, queue, num_batches, row_count, column_count, dtype, use_raw_input +): + run_test_test_sklearnex_partial_fit_on_random_data( + dataframe, queue, num_batches, row_count, column_count, dtype, use_raw_input + ) + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("num_batches", [2, 10]) +@pytest.mark.parametrize("row_count", [100, 1000]) +@pytest.mark.parametrize("column_count", [10, 100]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_sklearnex_partial_fit_on_random_data( + dataframe, queue, num_batches, row_count, column_count, dtype +): + run_test_test_sklearnex_partial_fit_on_random_data( + dataframe, queue, num_batches, row_count, column_count, dtype + ) + + +def run_test_test_sklearnex_partial_fit_on_random_data( + dataframe, queue, num_batches, row_count, column_count, dtype, use_raw_input=False ): from sklearnex.covariance import IncrementalEmpiricalCovariance @@ -145,7 +168,7 @@ def test_sklearnex_partial_fit_on_random_data( X_split_df = _convert_to_dataframe( X_split[i], sycl_queue=queue, target_df=dataframe ) - result = inccov.partial_fit(X_split_df) + result = inccov.partial_fit(X_split_df, check_input=not use_raw_input) expected_covariance = np.cov(X.T, bias=1) expected_means = np.mean(X, axis=0) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index 0a9366ea87..c7c14e41c8 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -246,7 +246,6 @@ def test_sklearnex_fit_transform_on_gold_data( @pytest.mark.parametrize("row_count", [100, 1000]) @pytest.mark.parametrize("column_count", [10, 100]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) def test_sklearnex_partial_fit_on_random_data( dataframe, queue, From 9964c5abbd5cac73a7aa79712f423d6af25b3c47 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 04:39:38 -0800 Subject: [PATCH 30/56] fixup --- .../preview/decomposition/tests/test_incremental_pca.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index c7c14e41c8..daf9b6e293 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -255,7 +255,6 @@ def test_sklearnex_partial_fit_on_random_data( row_count, column_count, dtype, - use_raw_input, ): seed = 81 gen = np.random.default_rng(seed) @@ -263,13 +262,6 @@ def test_sklearnex_partial_fit_on_random_data( X = X.astype(dtype=dtype) X_split = np.array_split(X, num_blocks) incpca = IncrementalPCA(n_components=n_components, whiten=whiten) - with config_context(use_raw_input=use_raw_input): - for i in range(num_blocks): - X_split_df = _convert_to_dataframe( - X_split[i], sycl_queue=queue, target_df=dataframe - ) - incpca.partial_fit(X_split_df) - for i in range(num_blocks): X_split_df = _convert_to_dataframe( X_split[i], sycl_queue=queue, target_df=dataframe From 84afb62936abbbd2d5a83527cb12c8a7265ad38f Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 05:36:22 -0800 Subject: [PATCH 31/56] undo more changes that broke tests --- onedal/ensemble/forest.py | 36 +++++--------------- sklearnex/ensemble/_forest.py | 63 +++++++++++++++++------------------ 2 files changed, 38 insertions(+), 61 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 5327dedecb..ddb214b010 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -330,20 +330,10 @@ def _fit(self, X, y, sample_weight, module, queue): if self.oob_score: if isinstance(self, ClassifierMixin): - # self.oob_score_ = from_table(train_result.oob_err_accuracy).item() - self.oob_score_ = from_table( - train_result.oob_err_accuracy, - sua_iface=sua_iface, - sycl_queue=queue, - xp=xp, - )[0] - + self.oob_score_ = from_table(train_result.oob_err_accuracy).item() self.oob_decision_function_ = from_table( - train_result.oob_err_decision_function, - sua_iface=sua_iface, - sycl_queue=queue, - xp=xp, - )[0] + train_result.oob_err_decision_function + ) if xp.any(self.oob_decision_function_ == 0): warnings.warn( "Some inputs do not have OOB scores. This probably means " @@ -352,21 +342,11 @@ def _fit(self, X, y, sample_weight, module, queue): UserWarning, ) else: - # self.oob_score_ = from_table(train_result.oob_err_r2).item() - self.oob_score_ = from_table( - train_result.oob_err_r2, sua_iface=sua_iface, sycl_queue=queue, xp=xp - )[0] - # self.oob_prediction_ = from_table(train_result.oob_err_prediction).reshape(-1) - self.oob_score_ = xp.reshape( - from_table( - train_result.oob_err_r2, - sua_iface=sua_iface, - sycl_queue=queue, - xp=xp, - ), - -1, - ) - if xp.any(self.oob_prediction_ == 0): + self.oob_score_ = from_table(train_result.oob_err_r2).item() + self.oob_prediction_ = from_table( + train_result.oob_err_prediction + ).reshape(-1) + if np.any(self.oob_prediction_ == 0): warnings.warn( "Some inputs do not have OOB scores. This probably means " "too few trees were used to compute any reliable OOB " diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 455d41d9ae..ab62c219a4 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -812,42 +812,39 @@ def _onedal_gpu_supported(self, method_name, *data): return patching_status def _onedal_predict(self, X, queue=None): - xp, _ = get_namespace(X) - use_raw_input = get_config()["use_raw_input"] - if not use_raw_input: - if sklearn_check_version("1.0"): - X = validate_data( - self, + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + force_all_finite=False, + reset=False, + ensure_2d=True, + ) + else: + if not get_config()["use_raw_input"]: + X = check_array( X, dtype=[np.float64, np.float32], force_all_finite=False, - reset=False, - ensure_2d=True, - ) - # sklearn version < 1.0 is not supported - # else: - # X = check_array( - # X, - # dtype=[np.float64, np.float32], - # force_all_finite=False, - # ) # Warning, order of dtype matters - # if hasattr(self, "n_features_in_"): - # try: - # num_features = _num_features(X) - # except TypeError: - # num_features = _num_samples(X) - # if num_features != self.n_features_in_: - # raise ValueError( - # ( - # f"X has {num_features} features, " - # f"but {self.__class__.__name__} is expecting " - # f"{self.n_features_in_} features as input" - # ) - # ) - # self._check_n_features(X, reset=False) - res = xp.reshape(self._onedal_estimator.predict(X, queue=queue), -1) - res = xp.astype(res, xp.int64) - return xp.take(self.classes_, res) + ) # Warning, order of dtype matters + if hasattr(self, "n_features_in_"): + try: + num_features = _num_features(X) + except TypeError: + num_features = _num_samples(X) + if num_features != self.n_features_in_: + raise ValueError( + ( + f"X has {num_features} features, " + f"but {self.__class__.__name__} is expecting " + f"{self.n_features_in_} features as input" + ) + ) + self._check_n_features(X, reset=False) + + res = self._onedal_estimator.predict(X, queue=queue) + return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe")) def _onedal_predict_proba(self, X, queue=None): xp, _ = get_namespace(X) From cf5b73629a9e8c0c1fc22bddc3450a97e073825d Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 13:58:39 -0800 Subject: [PATCH 32/56] format --- onedal/_device_offload.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index cc3c16a043..6a89d43b81 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -163,13 +163,14 @@ def support_input_format(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): # Check if the function is KNeighborsClassifier.fit - override_raw_input = obj.__class__.__name__ == "KNeighborsClassifier" and func.__name__ == "fit" + override_raw_input = ( + obj.__class__.__name__ == "KNeighborsClassifier" + and func.__name__ == "fit" + ) if _get_config()["use_raw_input"] is True and not override_raw_input: if "queue" not in kwargs: usm_iface = getattr(args[0], "__sycl_usm_array_interface__", None) - data_queue = ( - usm_iface["syclobj"] if usm_iface is not None else None - ) + data_queue = usm_iface["syclobj"] if usm_iface is not None else None kwargs["queue"] = data_queue return _run_on_device(func, obj, *args, **kwargs) From 92393b9ecb7de2b353b10f27e57e38f568f1b41d Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 18 Dec 2024 01:07:20 -0800 Subject: [PATCH 33/56] restore original behavior when running without raw inputs --- onedal/linear_model/incremental_linear_model.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 4e3021e07a..64a371cb23 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -221,8 +221,6 @@ def partial_fit(self, X, y, queue=None): module = self._get_backend("linear_model", "regression") self._sua_iface, self._xp, _ = _get_sycl_namespace(X) - if self._xp is None: - self._xp = np use_raw_input = _get_config().get("use_raw_input") is True if use_raw_input and self._sua_iface is not None: queue = X.sycl_queue @@ -272,12 +270,15 @@ def finalize_fit(self, queue=None): self._onedal_model = result.model - packed_coefficients = from_table( - result.model.packed_coefficients, - sua_iface=self._sua_iface, - sycl_queue=self._queue, - xp=self._xp, - ) + if _get_config().get("use_raw_input") is True: + packed_coefficients = from_table( + result.model.packed_coefficients, + sua_iface=self._sua_iface, + sycl_queue=self._queue, + xp=self._xp, + ) + else: + packed_coefficients = from_table(result.model.packed_coefficients) self.coef_, self.intercept_ = ( self._xp.squeeze(packed_coefficients[:, 1:]), self._xp.squeeze(packed_coefficients[:, 0]), From 13471e5d38026a928fc92496b636d117ad8f6daa Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 18 Dec 2024 03:06:48 -0800 Subject: [PATCH 34/56] restore original behavior when running without raw inputs --- onedal/linear_model/incremental_linear_model.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 64a371cb23..95025ee802 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -146,12 +146,15 @@ def finalize_fit(self, queue=None): self._onedal_model = result.model - packed_coefficients = from_table( - result.model.packed_coefficients, - sua_iface=self._sua_iface, - sycl_queue=self._queue, - xp=self._xp, - ) + if _get_config().get("use_raw_input") is True: + packed_coefficients = from_table( + result.model.packed_coefficients, + sua_iface=self._sua_iface, + sycl_queue=self._queue, + xp=self._xp, + ) + else: + packed_coefficients = from_table(result.model.packed_coefficients) self.coef_, self.intercept_ = ( self._xp.squeeze(packed_coefficients[:, 1:]), self._xp.squeeze(packed_coefficients[:, 0]), From a8f3f196d040b99429a89e2cfc48ff4695dbd233 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 18 Dec 2024 04:48:20 -0800 Subject: [PATCH 35/56] align code --- .../linear_model/incremental_linear_model.py | 24 ++++++++++++------- onedal/linear_model/linear_model.py | 4 +++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 95025ee802..03911751fa 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -153,12 +153,16 @@ def finalize_fit(self, queue=None): sycl_queue=self._queue, xp=self._xp, ) + self.coef_, self.intercept_ = ( + self._xp.squeeze(packed_coefficients[:, 1:]), + self._xp.squeeze(packed_coefficients[:, 0]), + ) else: packed_coefficients = from_table(result.model.packed_coefficients) - self.coef_, self.intercept_ = ( - self._xp.squeeze(packed_coefficients[:, 1:]), - self._xp.squeeze(packed_coefficients[:, 0]), - ) + self.coef_, self.intercept_ = ( + packed_coefficients[:, 1:].squeeze(), + packed_coefficients[:, 0].squeeze(), + ) return self @@ -280,11 +284,15 @@ def finalize_fit(self, queue=None): sycl_queue=self._queue, xp=self._xp, ) + self.coef_, self.intercept_ = ( + self._xp.squeeze(packed_coefficients[:, 1:]), + self._xp.squeeze(packed_coefficients[:, 0]), + ) else: packed_coefficients = from_table(result.model.packed_coefficients) - self.coef_, self.intercept_ = ( - self._xp.squeeze(packed_coefficients[:, 1:]), - self._xp.squeeze(packed_coefficients[:, 0]), - ) + self.coef_, self.intercept_ = ( + packed_coefficients[:, 1:].squeeze(), + packed_coefficients[:, 0].squeeze(), + ) return self diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index 6fc5d2ce33..fdb8434089 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -304,6 +304,8 @@ def fit(self, X, y, queue=None): self : object Fitted Estimator. """ + sua_iface, xp, _ = _get_sycl_namespace(X) + module = self._get_backend("linear_model", "regression") _, xp, _ = _get_sycl_namespace(X) @@ -338,7 +340,7 @@ def fit(self, X, y, queue=None): self._onedal_model = result.model packed_coefficients = from_table( - result.model.packed_coefficients, sycl_queue=queue + result.model.packed_coefficients, sua_iface=sua_iface, sycl_queue=queue, xp=xp ) self.coef_, self.intercept_ = ( packed_coefficients[:, 1:], From 2b07c000357749b8b9266844e39b1d058991d40d Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 18 Dec 2024 23:50:07 -0800 Subject: [PATCH 36/56] restore original from_table --- onedal/covariance/covariance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index 84771a9318..1df23f2797 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -127,6 +127,6 @@ def fit(self, X, y=None, queue=None): / X.shape[0] ) - self.location_ = xp.reshape(from_table(result.means, sycl_queue=queue), -1) + self.location_ = from_table(result.means).ravel() return self From 61047369f56b4652fde9f5e90329b421e419c55e Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 19 Dec 2024 00:39:28 -0800 Subject: [PATCH 37/56] add use_raw_input tests for incremental covariance --- sklearnex/conftest.py | 20 +++++++++++ .../tests/test_incremental_covariance.py | 34 +++++-------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/sklearnex/conftest.py b/sklearnex/conftest.py index 4ecad5383b..bf35b6ad31 100644 --- a/sklearnex/conftest.py +++ b/sklearnex/conftest.py @@ -80,3 +80,23 @@ def with_array_api(): def without_allow_sklearn_after_onedal(): with config_context(allow_sklearn_after_onedal=False): yield + + +@pytest.fixture +def skip_unsupported_raw_input(request): + # lookup if use_raw_input and dataframe are used in the test + use_raw_input = ( + request.getfixturevalue("use_raw_input") + if "use_raw_input" in request.fixturenames + else False + ) + dataframe = ( + request.getfixturevalue("dataframe") + if "dataframe" in request.fixturenames + else None + ) + + # skip tests of unsupported dataframes when using use_raw_input=True + if use_raw_input is True and dataframe not in ["numpy", "dpnp", "dpctl"]: + pytest.skip(f"use_raw_input is not supported for {dataframe}") + yield diff --git a/sklearnex/covariance/tests/test_incremental_covariance.py b/sklearnex/covariance/tests/test_incremental_covariance.py index faf8342aeb..f84fc46171 100644 --- a/sklearnex/covariance/tests/test_incremental_covariance.py +++ b/sklearnex/covariance/tests/test_incremental_covariance.py @@ -122,37 +122,21 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, batch_size, dtype): assert_allclose(expected_means, result.location_) -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="numpy,dpnp,dpctl") -) -@pytest.mark.parametrize("num_batches", [2, 10]) -@pytest.mark.parametrize("row_count", [100, 1000]) -@pytest.mark.parametrize("column_count", [10, 100]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("use_raw_input", [True, False]) -def test_sklearnex_partial_fit_on_random_data_raw( - dataframe, queue, num_batches, row_count, column_count, dtype, use_raw_input -): - run_test_test_sklearnex_partial_fit_on_random_data( - dataframe, queue, num_batches, row_count, column_count, dtype, use_raw_input - ) - - @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @pytest.mark.parametrize("num_batches", [2, 10]) @pytest.mark.parametrize("row_count", [100, 1000]) @pytest.mark.parametrize("column_count", [10, 100]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("use_raw_input", [True, False]) def test_sklearnex_partial_fit_on_random_data( - dataframe, queue, num_batches, row_count, column_count, dtype -): - run_test_test_sklearnex_partial_fit_on_random_data( - dataframe, queue, num_batches, row_count, column_count, dtype - ) - - -def run_test_test_sklearnex_partial_fit_on_random_data( - dataframe, queue, num_batches, row_count, column_count, dtype, use_raw_input=False + skip_unsupported_raw_input, + dataframe, + queue, + num_batches, + row_count, + column_count, + dtype, + use_raw_input, ): from sklearnex.covariance import IncrementalEmpiricalCovariance From df03233b0c8c74caac847cce3dc75d930afaef42 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 19 Dec 2024 08:06:17 -0800 Subject: [PATCH 38/56] Add basic statistics testing --- sklearnex/_device_offload.py | 2 +- .../basic_statistics/basic_statistics.py | 16 ++++++++------ .../tests/test_basic_statistics.py | 21 ++++++++++++++----- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index cc6d627fdc..5d6d5ba374 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -58,7 +58,7 @@ def _get_backend(obj, queue, method_name, *data): def dispatch(obj, method_name, branches, *args, **kwargs): - if get_config()["use_raw_input"] is not True: + if get_config()["use_raw_input"] is False: q = _get_global_queue() has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) diff --git a/sklearnex/basic_statistics/basic_statistics.py b/sklearnex/basic_statistics/basic_statistics.py index da82e3bd82..c62d5e4a5c 100644 --- a/sklearnex/basic_statistics/basic_statistics.py +++ b/sklearnex/basic_statistics/basic_statistics.py @@ -23,6 +23,7 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version +from onedal._config import _get_config from onedal.basic_statistics import BasicStatistics as onedal_BasicStatistics from .._device_offload import dispatch @@ -179,13 +180,16 @@ def _onedal_fit(self, X, sample_weight=None, queue=None): if sklearn_check_version("1.2"): self._validate_params() - if sklearn_check_version("1.0"): - X = validate_data(self, X, dtype=[np.float64, np.float32], ensure_2d=False) - else: - X = check_array(X, dtype=[np.float64, np.float32]) + if _get_config()["use_raw_input"] is False: + if sklearn_check_version("1.0"): + X = validate_data( + self, X, dtype=[np.float64, np.float32], ensure_2d=False + ) + else: + X = check_array(X, dtype=[np.float64, np.float32]) - if sample_weight is not None: - sample_weight = _check_sample_weight(sample_weight, X) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) onedal_params = { "result_options": self.result_options, diff --git a/sklearnex/basic_statistics/tests/test_basic_statistics.py b/sklearnex/basic_statistics/tests/test_basic_statistics.py index a5515f240d..e661a9fbc7 100644 --- a/sklearnex/basic_statistics/tests/test_basic_statistics.py +++ b/sklearnex/basic_statistics/tests/test_basic_statistics.py @@ -24,6 +24,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex._config import config_context from sklearnex.basic_statistics import BasicStatistics @@ -96,8 +97,17 @@ def test_multiple_options_on_gold_data(dataframe, queue, weighted, dtype): @pytest.mark.parametrize("column_count", [10, 100]) @pytest.mark.parametrize("weighted", [True, False]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("use_raw_input", [True, False]) def test_single_option_on_random_data( - dataframe, queue, result_option, row_count, column_count, weighted, dtype + skip_unsupported_raw_input, + dataframe, + queue, + result_option, + row_count, + column_count, + weighted, + dtype, + use_raw_input, ): function, tols = options_and_tests[result_option] fp32tol, fp64tol = tols @@ -112,10 +122,11 @@ def test_single_option_on_random_data( weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe) basicstat = BasicStatistics(result_options=result_option) - if weighted: - result = basicstat.fit(X_df, sample_weight=weights_df) - else: - result = basicstat.fit(X_df) + with config_context(use_raw_input=use_raw_input): + if weighted: + result = basicstat.fit(X_df, sample_weight=weights_df) + else: + result = basicstat.fit(X_df) res = getattr(result, result_option) if weighted: From 8a166b7d57707bb1846192ad6ca650368e855fd3 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 19 Dec 2024 08:48:11 -0800 Subject: [PATCH 39/56] add incremental basic statistics --- .../incremental_basic_statistics.py | 6 ++++- .../test_incremental_basic_statistics.py | 27 +++++++++++-------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py index d1ddcd55dc..2cf24de285 100644 --- a/sklearnex/basic_statistics/incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/incremental_basic_statistics.py @@ -21,6 +21,7 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version +from onedal._config import _get_config from onedal.basic_statistics import ( IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, ) @@ -194,6 +195,9 @@ def _onedal_finalize_fit(self, queue=None): def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=True): first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 + use_raw_input = _get_config()["use_raw_input"] + # never check input when using raw input + check_input &= use_raw_input is False if check_input: if sklearn_check_version("1.0"): X = validate_data( @@ -208,7 +212,7 @@ def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=Tru dtype=[np.float64, np.float32], ) - if sample_weight is not None: + if not use_raw_input and sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) if first_pass: diff --git a/sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py b/sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py index c648537993..ffd1beb29a 100644 --- a/sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py @@ -24,13 +24,17 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex._config import config_context from sklearnex.basic_statistics import IncrementalBasicStatistics @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @pytest.mark.parametrize("weighted", [True, False]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_partial_fit_multiple_options_on_gold_data(dataframe, queue, weighted, dtype): +@pytest.mark.parametrize("use_raw_input", [True, False]) +def test_partial_fit_multiple_options_on_gold_data( + skip_unsupported_raw_input, dataframe, queue, weighted, dtype, use_raw_input +): X = np.array([[0, 0], [1, 1]]) X = X.astype(dtype=dtype) X_split = np.array_split(X, 2) @@ -40,17 +44,18 @@ def test_partial_fit_multiple_options_on_gold_data(dataframe, queue, weighted, d weights_split = np.array_split(weights, 2) incbs = IncrementalBasicStatistics() - for i in range(2): - X_split_df = _convert_to_dataframe( - X_split[i], sycl_queue=queue, target_df=dataframe - ) - if weighted: - weights_split_df = _convert_to_dataframe( - weights_split[i], sycl_queue=queue, target_df=dataframe + with config_context(use_raw_input=use_raw_input): + for i in range(2): + X_split_df = _convert_to_dataframe( + X_split[i], sycl_queue=queue, target_df=dataframe ) - result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df) - else: - result = incbs.partial_fit(X_split_df) + if weighted: + weights_split_df = _convert_to_dataframe( + weights_split[i], sycl_queue=queue, target_df=dataframe + ) + result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df) + else: + result = incbs.partial_fit(X_split_df) if weighted: expected_weighted_mean = np.array([0.25, 0.25]) From fb5f5faa98a6a5fc885c594ad90a792064aab174 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 19 Dec 2024 09:23:37 -0800 Subject: [PATCH 40/56] add dbscan --- onedal/cluster/dbscan.py | 37 +++++++------------------- sklearnex/cluster/dbscan.py | 11 ++++---- sklearnex/cluster/tests/test_dbscan.py | 11 +++++--- 3 files changed, 22 insertions(+), 37 deletions(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 89c47b271f..7581fb922d 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -60,48 +60,29 @@ def _get_onedal_params(self, dtype=np.float32): def _fit(self, X, y, sample_weight, module, queue): use_raw_input = _get_config().get("use_raw_input", False) is True - sua_iface, xp, _ = _get_sycl_namespace(X) - - # All data should use the same sycl queue - if use_raw_input and sua_iface is not None: - queue = X.sycl_queue - policy = self._get_policy(queue, X) + if not use_raw_input: X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None X = make2d(X) X_table, sample_weight_table = to_table(X, sample_weight, queue=queue) - params = self._get_onedal_params(X_table.dtype) result = module.compute(policy, params, X_table, sample_weight_table) - self.labels_ = xp.reshape( - from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp), -1 - ) + _, xp, _ = _get_sycl_namespace(X) + self.labels_ = from_table(result.responses, sycl_queue=queue).ravel() if ( result.core_observation_indices is not None and not result.core_observation_indices.kind == "empty" ): - self.core_sample_indices_ = xp.reshape( - from_table( - result.core_observation_indices, - sycl_queue=queue, - sua_iface=sua_iface, - xp=xp, - ), - -1, - ) + self.core_sample_indices_ = from_table( + result.core_observation_indices, + sycl_queue=queue, + ).ravel() else: - # TODO: - # self.core_sample_indices_ = _asarray([], xp, sycl_queue=queue, dtype=xp.int32) - if sua_iface: - self.core_sample_indices_ = xp.asarray( - [], sycl_queue=queue, dtype=xp.int32 - ) - else: - self.core_sample_indices_ = xp.asarray([], dtype=xp.int32) - self.components_ = xp.take(X, self.core_sample_indices_, axis=0) + self.core_sample_indices_ = np.array([], dtype=np.intc) + self.components_ = np.take(X, self.core_sample_indices_, axis=0) self.n_features_in_ = X.shape[1] return self diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index d4d67ee94b..6fc6a4dfcc 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -23,6 +23,7 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version +from onedal._config import _get_config from onedal.cluster import DBSCAN as onedal_DBSCAN from .._config import get_config @@ -90,8 +91,7 @@ def __init__( self.n_jobs = n_jobs def _onedal_fit(self, X, y, sample_weight=None, queue=None): - use_raw_input = get_config().get("use_raw_input", False) is True - if not use_raw_input: + if _get_config()["use_raw_input"] is False: if sklearn_check_version("1.0"): X = validate_data(self, X, force_all_finite=False) @@ -143,7 +143,6 @@ def _onedal_gpu_supported(self, method_name, *data): return self._onedal_supported(method_name, *data) def fit(self, X, y=None, sample_weight=None): - use_raw_input = get_config().get("use_raw_input", False) is True if sklearn_check_version("1.2"): self._validate_params() elif sklearn_check_version("1.1"): @@ -182,9 +181,9 @@ def fit(self, X, y=None, sample_weight=None): if self.eps <= 0.0: raise ValueError(f"eps == {self.eps}, must be > 0.0.") - if not use_raw_input: - if sample_weight is not None: - sample_weight = _check_sample_weight(sample_weight, X) + use_raw_input = get_config().get("use_raw_input", False) is True + if not use_raw_input and sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) dispatch( self, "fit", diff --git a/sklearnex/cluster/tests/test_dbscan.py b/sklearnex/cluster/tests/test_dbscan.py index a83b5b7cec..4d7bd13171 100755 --- a/sklearnex/cluster/tests/test_dbscan.py +++ b/sklearnex/cluster/tests/test_dbscan.py @@ -22,15 +22,20 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex._config import config_context @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) -def test_sklearnex_import_dbscan(dataframe, queue): +@pytest.mark.parametrize("use_raw_input", [True, False]) +def test_sklearnex_import_dbscan( + skip_unsupported_raw_input, dataframe, queue, use_raw_input +): from sklearnex.cluster import DBSCAN - X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]]) + X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]], dtype=np.float32) X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) - dbscan = DBSCAN(eps=3, min_samples=2).fit(X) + with config_context(use_raw_input=use_raw_input): + dbscan = DBSCAN(eps=3, min_samples=2).fit(X) assert "sklearnex" in dbscan.__module__ result = dbscan.labels_ From 91384eda942e0e10529f5fafd3f35e69f38073cb Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 19 Dec 2024 23:50:17 -0800 Subject: [PATCH 41/56] add kmeans --- onedal/cluster/kmeans.py | 5 +-- sklearnex/cluster/k_means.py | 43 ++++++++++++++------------ sklearnex/cluster/tests/test_kmeans.py | 19 +++++++++--- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index b8d35184c1..6d6a94324c 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -263,11 +263,8 @@ def _fit_backend( def _fit(self, X, module, queue=None): policy = self._get_policy(queue, X) is_csr = _is_csr(X) - use_raw_input = _get_config().get("use_raw_input") is True - if use_raw_input and _get_sycl_namespace(X)[0] is not None: - queue = X.sycl_queue - if not use_raw_input: + if _get_config()["use_raw_input"] is False: X = _check_array( X, dtype=[np.float64, np.float32], diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index 4ba75ca5b8..6dddfd207f 100644 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -17,6 +17,7 @@ import logging from daal4py.sklearn._utils import daal_check_version +from sklearnex._config import get_config if daal_check_version((2023, "P", 200)): @@ -156,20 +157,21 @@ def fit(self, X, y=None, sample_weight=None): return self def _onedal_fit(self, X, _, sample_weight, queue=None): - X = validate_data( - self, - X, - accept_sparse="csr", - dtype=[np.float64, np.float32], - order="C", - copy=self.copy_x, - accept_large_sparse=False, - ) + if get_config()["use_raw_input"] is False: + X = validate_data( + self, + X, + accept_sparse="csr", + dtype=[np.float64, np.float32], + order="C", + copy=self.copy_x, + accept_large_sparse=False, + ) - if sklearn_check_version("1.2"): - self._check_params_vs_input(X) - else: - self._check_params(X) + if sklearn_check_version("1.2"): + self._check_params_vs_input(X) + else: + self._check_params(X) self._n_features_out = self.n_clusters @@ -295,13 +297,14 @@ def predict( ) def _onedal_predict(self, X, sample_weight=None, queue=None): - X = validate_data( - self, - X, - accept_sparse="csr", - reset=False, - dtype=[np.float64, np.float32], - ) + if get_config()["use_raw_input"] is False: + X = validate_data( + self, + X, + accept_sparse="csr", + reset=False, + dtype=[np.float64, np.float32], + ) if not hasattr(self, "_onedal_estimator"): self._initialize_onedal_estimator() diff --git a/sklearnex/cluster/tests/test_kmeans.py b/sklearnex/cluster/tests/test_kmeans.py index ca18a07114..389a84043b 100755 --- a/sklearnex/cluster/tests/test_kmeans.py +++ b/sklearnex/cluster/tests/test_kmeans.py @@ -92,15 +92,21 @@ def test_sklearnex_import_for_sparse_data(queue, algorithm, init): @pytest.mark.parametrize( "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"] ) -def test_results_on_dense_gold_data(dataframe, queue, algorithm): +@pytest.mark.parametrize("use_raw_input", [True, False]) +def test_results_on_dense_gold_data( + skip_unsupported_raw_input, dataframe, queue, algorithm, use_raw_input +): from sklearnex.cluster import KMeans - X_train = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) - X_test = np.array([[0, 0], [12, 3]]) + X_train = np.array( + [[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]], dtype=np.float32 + ) + X_test = np.array([[0, 0], [12, 3]], dtype=np.float32) X_train_df = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe) X_test_df = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe) - kmeans = KMeans(n_clusters=2, random_state=0, algorithm=algorithm).fit(X_train_df) + with config_context(use_raw_input=use_raw_input): + kmeans = KMeans(n_clusters=2, random_state=0, algorithm=algorithm).fit(X_train_df) if queue and queue.sycl_device.is_gpu: # KMeans Init Dense GPU implementation is different from CPU @@ -112,7 +118,10 @@ def test_results_on_dense_gold_data(dataframe, queue, algorithm): expected_cluster_centers = np.array([[10.0, 2.0], [1.0, 2.0]], dtype=np.float32) expected_inertia = 16.0 - assert_allclose(expected_cluster_labels, _as_numpy(kmeans.predict(X_test_df))) + with config_context(use_raw_input=use_raw_input): + result = kmeans.predict(X_test_df) + + assert_allclose(expected_cluster_labels, _as_numpy(result)) assert_allclose(expected_cluster_centers, _as_numpy(kmeans.cluster_centers_)) assert expected_inertia == kmeans.inertia_ From 6dec57dc821379be9bf336175d3093117ca8588b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 20 Dec 2024 01:25:14 -0800 Subject: [PATCH 42/56] add covariance --- onedal/covariance/covariance.py | 2 +- sklearnex/preview/covariance/covariance.py | 10 ++++---- .../covariance/tests/test_covariance.py | 23 +++++++++++++++---- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index 84771a9318..27045b9168 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -127,6 +127,6 @@ def fit(self, X, y=None, queue=None): / X.shape[0] ) - self.location_ = xp.reshape(from_table(result.means, sycl_queue=queue), -1) + self.location_ = from_table(result.means, sycl_queue=queue).ravel() return self diff --git a/sklearnex/preview/covariance/covariance.py b/sklearnex/preview/covariance/covariance.py index 04bdc0be8d..b92c9bb7e9 100644 --- a/sklearnex/preview/covariance/covariance.py +++ b/sklearnex/preview/covariance/covariance.py @@ -23,6 +23,7 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import daal_check_version, sklearn_check_version +from onedal._config import _get_config from onedal.common.hyperparameters import get_hyperparameters from onedal.covariance import EmpiricalCovariance as onedal_EmpiricalCovariance from sklearnex import config_context @@ -95,10 +96,11 @@ def _onedal_supported(self, method_name, *data): def fit(self, X, y=None): if sklearn_check_version("1.2"): self._validate_params() - if sklearn_check_version("0.23"): - X = validate_data(self, X, force_all_finite=False) - else: - X = check_array(X, force_all_finite=False) + if _get_config()["use_raw_input"] is False: + if sklearn_check_version("0.23"): + X = validate_data(self, X, force_all_finite=False) + else: + X = check_array(X, force_all_finite=False) dispatch( self, diff --git a/sklearnex/preview/covariance/tests/test_covariance.py b/sklearnex/preview/covariance/tests/test_covariance.py index 71eb9235c3..39c87dbb47 100644 --- a/sklearnex/preview/covariance/tests/test_covariance.py +++ b/sklearnex/preview/covariance/tests/test_covariance.py @@ -23,22 +23,33 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex._config import config_context @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @pytest.mark.parametrize("macro_block", [None, 1024]) @pytest.mark.parametrize("assume_centered", [True, False]) -def test_sklearnex_import_covariance(dataframe, queue, macro_block, assume_centered): +@pytest.mark.parametrize("use_raw_input", [True, False]) +def test_sklearnex_import_covariance( + skip_unsupported_raw_input, + dataframe, + queue, + macro_block, + assume_centered, + use_raw_input, +): from sklearnex.preview.covariance import EmpiricalCovariance - X = np.array([[0, 1], [0, 1]]) + X = np.array([[0, 1], [0, 1]], dtype=np.float32) X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) empcov = EmpiricalCovariance(assume_centered=assume_centered) if daal_check_version((2024, "P", 0)) and macro_block is not None: hparams = EmpiricalCovariance.get_hyperparameters("fit") hparams.cpu_macro_block = macro_block - result = empcov.fit(X) + + with config_context(use_raw_input=use_raw_input): + result = empcov.fit(X) expected_covariance = np.array([[0, 0], [0, 0]]) expected_means = np.array([0, 0]) @@ -51,10 +62,12 @@ def test_sklearnex_import_covariance(dataframe, queue, macro_block, assume_cente assert_allclose(expected_covariance, result.covariance_) assert_allclose(expected_means, result.location_) - X = np.array([[1, 2], [3, 6]]) + X = np.array([[1, 2], [3, 6]], dtype=np.float32) X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) - result = empcov.fit(X) + + with config_context(use_raw_input=use_raw_input): + result = empcov.fit(X) if assume_centered: expected_covariance = np.array([[5, 10], [10, 20]]) From 529a7b83ed96a6ef6122b84923b05c22f170f6d6 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 20 Dec 2024 01:41:48 -0800 Subject: [PATCH 43/56] align get_config() import and use_raw_input retrieval --- onedal/decomposition/incremental_pca.py | 2 +- sklearnex/basic_statistics/basic_statistics.py | 4 ++-- sklearnex/basic_statistics/incremental_basic_statistics.py | 4 ++-- sklearnex/cluster/dbscan.py | 3 +-- sklearnex/preview/covariance/covariance.py | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index e25343a981..51dcbc410a 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -134,7 +134,7 @@ def partial_fit(self, X, queue): Returns the instance itself. """ - use_raw_input = _get_config().get("use_raw_input", False) + use_raw_input = _get_config()["use_raw_input"] sua_iface, xp, _ = _get_sycl_namespace(X) # Saving input array namespace and sua_iface, that will be used in # finalize_fit. diff --git a/sklearnex/basic_statistics/basic_statistics.py b/sklearnex/basic_statistics/basic_statistics.py index c62d5e4a5c..8caeb0f1ec 100644 --- a/sklearnex/basic_statistics/basic_statistics.py +++ b/sklearnex/basic_statistics/basic_statistics.py @@ -23,9 +23,9 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version -from onedal._config import _get_config from onedal.basic_statistics import BasicStatistics as onedal_BasicStatistics +from .._config import get_config from .._device_offload import dispatch from .._utils import IntelEstimator, PatchingConditionsChain @@ -180,7 +180,7 @@ def _onedal_fit(self, X, sample_weight=None, queue=None): if sklearn_check_version("1.2"): self._validate_params() - if _get_config()["use_raw_input"] is False: + if get_config()["use_raw_input"] is False: if sklearn_check_version("1.0"): X = validate_data( self, X, dtype=[np.float64, np.float32], ensure_2d=False diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py index 2cf24de285..cb77f15d4d 100644 --- a/sklearnex/basic_statistics/incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/incremental_basic_statistics.py @@ -21,11 +21,11 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version -from onedal._config import _get_config from onedal.basic_statistics import ( IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, ) +from .._config import get_config from .._device_offload import dispatch from .._utils import IntelEstimator, PatchingConditionsChain @@ -195,7 +195,7 @@ def _onedal_finalize_fit(self, queue=None): def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=True): first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 - use_raw_input = _get_config()["use_raw_input"] + use_raw_input = get_config()["use_raw_input"] # never check input when using raw input check_input &= use_raw_input is False if check_input: diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index 6fc6a4dfcc..ee8a8a2963 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -23,7 +23,6 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version -from onedal._config import _get_config from onedal.cluster import DBSCAN as onedal_DBSCAN from .._config import get_config @@ -91,7 +90,7 @@ def __init__( self.n_jobs = n_jobs def _onedal_fit(self, X, y, sample_weight=None, queue=None): - if _get_config()["use_raw_input"] is False: + if get_config()["use_raw_input"] is False: if sklearn_check_version("1.0"): X = validate_data(self, X, force_all_finite=False) diff --git a/sklearnex/preview/covariance/covariance.py b/sklearnex/preview/covariance/covariance.py index b92c9bb7e9..288cf3e353 100644 --- a/sklearnex/preview/covariance/covariance.py +++ b/sklearnex/preview/covariance/covariance.py @@ -23,12 +23,12 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import daal_check_version, sklearn_check_version -from onedal._config import _get_config from onedal.common.hyperparameters import get_hyperparameters from onedal.covariance import EmpiricalCovariance as onedal_EmpiricalCovariance from sklearnex import config_context from sklearnex.metrics import pairwise_distances +from ..._config import get_config from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain, register_hyperparameters @@ -96,7 +96,7 @@ def _onedal_supported(self, method_name, *data): def fit(self, X, y=None): if sklearn_check_version("1.2"): self._validate_params() - if _get_config()["use_raw_input"] is False: + if get_config()["use_raw_input"] is False: if sklearn_check_version("0.23"): X = validate_data(self, X, force_all_finite=False) else: From 9f78cbddf73baa1fe3f58513f359e0a73c462f46 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 20 Dec 2024 02:23:21 -0800 Subject: [PATCH 44/56] add incremental_pca --- onedal/decomposition/incremental_pca.py | 1 - .../preview/decomposition/incremental_pca.py | 47 +++++++++++-------- .../tests/test_incremental_pca.py | 14 ++++-- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index 51dcbc410a..0513194a70 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -148,7 +148,6 @@ def partial_fit(self, X, queue): X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) n_samples, n_features = X.shape - first_pass = not hasattr(self, "components_") if first_pass: self.components_ = None diff --git a/sklearnex/preview/decomposition/incremental_pca.py b/sklearnex/preview/decomposition/incremental_pca.py index fdf13e0817..185a137087 100644 --- a/sklearnex/preview/decomposition/incremental_pca.py +++ b/sklearnex/preview/decomposition/incremental_pca.py @@ -22,6 +22,7 @@ from daal4py.sklearn._utils import sklearn_check_version from onedal.decomposition import IncrementalPCA as onedal_IncrementalPCA +from ..._config import get_config from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain @@ -36,21 +37,22 @@ ) class IncrementalPCA(_sklearn_IncrementalPCA): + _need_to_finalize_attrs = { + "mean_", + "explained_variance_", + "explained_variance_ratio_", + "n_components_", + "components_", + "noise_variance_", + "singular_values_", + "var_", + } + def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=None): super().__init__( n_components=n_components, whiten=whiten, copy=copy, batch_size=batch_size ) self._need_to_finalize = False - self._need_to_finalize_attrs = { - "mean_", - "explained_variance_", - "explained_variance_ratio_", - "n_components_", - "components_", - "noise_variance_", - "singular_values_", - "var_", - } _onedal_incremental_pca = staticmethod(onedal_IncrementalPCA) @@ -68,6 +70,9 @@ def _onedal_fit_transform(self, X, queue=None): def _onedal_partial_fit(self, X, check_input=True, queue=None): first_pass = not hasattr(self, "_onedal_estimator") + use_raw_input = get_config()["use_raw_input"] + # never check input when using raw input + check_input &= use_raw_input is False if check_input: if sklearn_check_version("1.0"): X = validate_data( @@ -161,18 +166,20 @@ def _onedal_supported(self, method_name, *data): _onedal_gpu_supported = _onedal_supported def __getattr__(self, attr): - if attr in self._need_to_finalize_attrs: - if hasattr(self, "_onedal_estimator"): - if self._need_to_finalize: - self._onedal_finalize_fit() - return getattr(self._onedal_estimator, attr) - else: + # finalize the fit if requested attribute requires it + if attr in IncrementalPCA._need_to_finalize_attrs: + if "_onedal_estimator" not in self.__dict__: + # _onedal_estimator required to finalize the fit raise AttributeError( - f"'{self.__class__.__name__}' object has no attribute '{attr}'" + f"Requested postfit attribute '{attr}' before fitting the model." ) - if attr in self.__dict__: - return self.__dict__[attr] - + if self.__dict__["_need_to_finalize"]: + self._onedal_finalize_fit() + # join attributes of the class and the onedal_estimator to provide common interface + joined = self.__dict__ | self.__dict__.get("_onedal_estimator", {}).__dict__ + if attr in joined: + return joined[attr] + # raise AttributeError if attribute is neither in this class nor in _onedal_estimator raise AttributeError( f"'{self.__class__.__name__}' object has no attribute '{attr}'" ) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index daf9b6e293..fb86a0d441 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -246,7 +246,9 @@ def test_sklearnex_fit_transform_on_gold_data( @pytest.mark.parametrize("row_count", [100, 1000]) @pytest.mark.parametrize("column_count", [10, 100]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("use_raw_input", [True, False]) def test_sklearnex_partial_fit_on_random_data( + skip_unsupported_raw_input, dataframe, queue, n_components, @@ -255,6 +257,7 @@ def test_sklearnex_partial_fit_on_random_data( row_count, column_count, dtype, + use_raw_input, ): seed = 81 gen = np.random.default_rng(seed) @@ -262,11 +265,12 @@ def test_sklearnex_partial_fit_on_random_data( X = X.astype(dtype=dtype) X_split = np.array_split(X, num_blocks) incpca = IncrementalPCA(n_components=n_components, whiten=whiten) - for i in range(num_blocks): - X_split_df = _convert_to_dataframe( - X_split[i], sycl_queue=queue, target_df=dataframe - ) - incpca.partial_fit(X_split_df) + with config_context(use_raw_input=use_raw_input): + for i in range(num_blocks): + X_split_df = _convert_to_dataframe( + X_split[i], sycl_queue=queue, target_df=dataframe + ) + incpca.partial_fit(X_split_df) X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.transform(X_df) From 658ccc1e45bfde9440086c1f0cdeaacc2ed35e9a Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 20 Dec 2024 06:07:09 -0800 Subject: [PATCH 45/56] add pca --- sklearnex/decomposition/pca.py | 50 ++++++++++++++--------- sklearnex/decomposition/tests/test_pca.py | 15 ++++--- 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/sklearnex/decomposition/pca.py b/sklearnex/decomposition/pca.py index 143587aa16..17f0f5fc23 100755 --- a/sklearnex/decomposition/pca.py +++ b/sklearnex/decomposition/pca.py @@ -17,6 +17,7 @@ import logging from daal4py.sklearn._utils import daal_check_version +from sklearnex._config import get_config if daal_check_version((2024, "P", 100)): import numbers @@ -138,13 +139,23 @@ def _fit(self, X): ) def _onedal_fit(self, X, queue=None): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - ensure_2d=True, - copy=self.copy, - ) + if get_config()["use_raw_input"] is True: + # With `use_raw_input=True`` we never check for oneDAL compatibility and instead + # always dispatch to oneDAL. + # For this algorithm this means that `_is_solver_compatible_with_onedal()`` + # never gets called, and therefore `self._fit_svd_solver` is not set. + # We therefore assert the solver compatibility here explictly to set all + # variables correctly. + assert self._is_solver_compatible_with_onedal(X.shape) + else: + # Compatibility is already asserted, continue with checking the provided data + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + ensure_2d=True, + copy=self.copy, + ) onedal_params = { "n_components": self.n_components, @@ -182,18 +193,19 @@ def transform(self, X): ) def _onedal_transform(self, X, queue=None): - if sklearn_check_version("1.0"): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - reset=False, - ) - else: - X = check_array( - X, - dtype=[np.float64, np.float32], - ) + if get_config()["use_raw_input"] is False: + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + reset=False, + ) + else: + X = check_array( + X, + dtype=[np.float64, np.float32], + ) self._validate_n_features_in_after_fitting(X) return self._onedal_estimator.predict(X, queue=queue) diff --git a/sklearnex/decomposition/tests/test_pca.py b/sklearnex/decomposition/tests/test_pca.py index 5f8270d80c..2aa2ce04f3 100755 --- a/sklearnex/decomposition/tests/test_pca.py +++ b/sklearnex/decomposition/tests/test_pca.py @@ -24,13 +24,15 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex._config import config_context @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) -def test_sklearnex_import(dataframe, queue): +@pytest.mark.parametrize("use_raw_input", [True, False]) +def test_sklearnex_import(skip_unsupported_raw_input, dataframe, queue, use_raw_input): from sklearnex.decomposition import PCA - X = [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]] + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float32) X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) X_transformed_expected = [ [-1.38340578, -0.2935787], @@ -42,9 +44,12 @@ def test_sklearnex_import(dataframe, queue): ] pca = PCA(n_components=2, svd_solver="covariance_eigh") - pca.fit(X) - X_transformed = pca.transform(X) - X_fit_transformed = PCA(n_components=2, svd_solver="covariance_eigh").fit_transform(X) + with config_context(use_raw_input=use_raw_input): + pca.fit(X) + X_transformed = pca.transform(X) + X_fit_transformed = PCA( + n_components=2, svd_solver="covariance_eigh" + ).fit_transform(X) if daal_check_version((2024, "P", 100)): assert "sklearnex" in pca.__module__ From 5e74a544861dedbe6f8464b2ded5da44ea26507b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 20 Dec 2024 08:38:56 -0800 Subject: [PATCH 46/56] add incremental linear --- .../linear_model/incremental_linear_model.py | 38 ++++--------- sklearnex/linear_model/incremental_linear.py | 54 +++++++++---------- .../tests/test_incremental_linear.py | 15 +++++- 3 files changed, 50 insertions(+), 57 deletions(-) diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 03911751fa..4eb4b86fcd 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -75,10 +75,7 @@ def partial_fit(self, X, y, queue=None): if not hasattr(self, "_params"): self._params = self._get_onedal_params(X.dtype) - use_raw_input = _get_config().get("use_raw_input") is True - if use_raw_input and self._sua_iface is not None: - queue = X.sycl_queue - if not use_raw_input: + if _get_config().get("use_raw_input") is False: X, y = _check_X_y( X, y, @@ -88,17 +85,15 @@ def partial_fit(self, X, y, queue=None): ) y = np.asarray(y, dtype=X.dtype) - X_table, y_table = to_table(X, y, queue=queue) - module = self._get_backend("linear_model", "regression") - self._sua_iface, self._xp, _ = _get_sycl_namespace(X, y) - - self._queue = queue policy = self._get_policy(queue, X) + queue = self._queue = getattr(policy, "_queue", None) self.n_features_in_ = _num_features(X, fallback_1d=True) + X_table, y_table = to_table(X, y, queue=queue) + hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: self._partial_result = module.partial_train( @@ -129,7 +124,6 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - if queue is not None: policy = self._get_policy(queue) else: @@ -146,23 +140,13 @@ def finalize_fit(self, queue=None): self._onedal_model = result.model - if _get_config().get("use_raw_input") is True: - packed_coefficients = from_table( - result.model.packed_coefficients, - sua_iface=self._sua_iface, - sycl_queue=self._queue, - xp=self._xp, - ) - self.coef_, self.intercept_ = ( - self._xp.squeeze(packed_coefficients[:, 1:]), - self._xp.squeeze(packed_coefficients[:, 0]), - ) - else: - packed_coefficients = from_table(result.model.packed_coefficients) - self.coef_, self.intercept_ = ( - packed_coefficients[:, 1:].squeeze(), - packed_coefficients[:, 0].squeeze(), - ) + packed_coefficients = from_table( + result.model.packed_coefficients, sycl_queue=self._queue + ) + self.coef_, self.intercept_ = ( + packed_coefficients[:, 1:].squeeze(), + packed_coefficients[:, 0].squeeze(), + ) return self diff --git a/sklearnex/linear_model/incremental_linear.py b/sklearnex/linear_model/incremental_linear.py index db2d6549c0..98708b4821 100644 --- a/sklearnex/linear_model/incremental_linear.py +++ b/sklearnex/linear_model/incremental_linear.py @@ -28,6 +28,7 @@ from onedal.linear_model import ( IncrementalLinearRegression as onedal_IncrementalLinearRegression, ) +from sklearnex._config import get_config if sklearn_check_version("1.2"): from sklearn.utils._param_validation import Interval @@ -240,31 +241,31 @@ def _onedal_finalize_fit(self, queue=None): self._need_to_finalize = False def _onedal_fit(self, X, y, queue=None): - if sklearn_check_version("1.2"): - self._validate_params() - - if sklearn_check_version("1.0"): - X, y = validate_data( - self, - X, - y, - dtype=[np.float64, np.float32], - copy=self.copy_X, - multi_output=True, - ensure_2d=True, - ) - else: - X = check_array( - X, - dtype=[np.float64, np.float32], - copy=self.copy_X, - ) - y = check_array( - y, - dtype=[np.float64, np.float32], - copy=False, - ensure_2d=False, - ) + if get_config()["use_raw_input"] is False: + if sklearn_check_version("1.2"): + self._validate_params() + if sklearn_check_version("1.0"): + X, y = validate_data( + self, + X, + y, + dtype=[np.float64, np.float32], + copy=self.copy_X, + multi_output=True, + ensure_2d=True, + ) + else: + X = check_array( + X, + dtype=[np.float64, np.float32], + copy=self.copy_X, + ) + y = check_array( + y, + dtype=[np.float64, np.float32], + copy=False, + ensure_2d=False, + ) n_samples, n_features = X.shape @@ -283,9 +284,6 @@ def _onedal_fit(self, X, y, queue=None): X_batch, y_batch = X[batch], y[batch] self._onedal_partial_fit(X_batch, y_batch, check_input=False, queue=queue) - if sklearn_check_version("1.2"): - self._validate_params() - # finite check occurs on onedal side self.n_features_in_ = n_features diff --git a/sklearnex/linear_model/tests/test_incremental_linear.py b/sklearnex/linear_model/tests/test_incremental_linear.py index e4ab649daf..4c446690d2 100644 --- a/sklearnex/linear_model/tests/test_incremental_linear.py +++ b/sklearnex/linear_model/tests/test_incremental_linear.py @@ -23,6 +23,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex._config import config_context from sklearnex.linear_model import IncrementalLinearRegression from sklearnex.tests.utils import _IS_INTEL @@ -31,7 +32,16 @@ @pytest.mark.parametrize("fit_intercept", [True, False]) @pytest.mark.parametrize("macro_block", [None, 1024]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block, dtype): +@pytest.mark.parametrize("use_raw_input", [True, False]) +def test_sklearnex_fit_on_gold_data( + skip_unsupported_raw_input, + dataframe, + queue, + fit_intercept, + macro_block, + dtype, + use_raw_input, +): X = np.array([[1], [2]]) X = X.astype(dtype=dtype) X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) @@ -44,7 +54,8 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block hparams = IncrementalLinearRegression.get_hyperparameters("fit") hparams.cpu_macro_block = macro_block hparams.gpu_macro_block = macro_block - inclin.fit(X_df, y_df) + with config_context(use_raw_input=use_raw_input): + inclin.fit(X_df, y_df) y_pred = inclin.predict(X_df) np_y_pred = _as_numpy(y_pred) From dfbf223adb688347cd5b1ccdcaa3d13e522c8d2a Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sun, 22 Dec 2024 03:25:35 -0800 Subject: [PATCH 47/56] add linear_model --- onedal/linear_model/linear_model.py | 19 ++++++++----------- sklearnex/linear_model/linear.py | 10 ++++++---- sklearnex/linear_model/tests/test_linear.py | 15 +++++++++++++-- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index fdb8434089..9b9c1b5a8b 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -200,13 +200,7 @@ def fit(self, X, y, queue=None): """ module = self._get_backend("linear_model", "regression") - sua_iface, xp, _ = _get_sycl_namespace(X) - use_raw_input = _get_config().get("use_raw_input") is True - if use_raw_input and sua_iface is not None: - queue = X.sycl_queue - - if not use_raw_input: - # TODO Fix _check_X_y to make sure this conversion is there + if _get_config()["use_raw_input"] is False: if not isinstance(X, np.ndarray): X = np.asarray(X) @@ -220,12 +214,15 @@ def fit(self, X, y, queue=None): X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) policy = self._get_policy(queue, X, y) + if _get_config()["use_raw_input"] is True: + # make sure we are using the queue from the on-device provided data + queue = getattr(policy, "_queue", queue) + + X_table, y_table = to_table(X, y, queue=queue) self.n_features_in_ = _num_features(X, fallback_1d=True) - X_table, y_table = to_table(X, y, queue=queue) params = self._get_onedal_params(X_table.dtype) - hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: result = module.train(policy, params, hparams.backend, X_table, y_table) @@ -235,7 +232,7 @@ def fit(self, X, y, queue=None): self._onedal_model = result.model packed_coefficients = from_table( - result.model.packed_coefficients, sua_iface=sua_iface, sycl_queue=queue, xp=xp + result.model.packed_coefficients, sycl_queue=queue ) self.coef_, self.intercept_ = ( packed_coefficients[:, 1:], @@ -243,7 +240,7 @@ def fit(self, X, y, queue=None): ) if self.coef_.shape[0] == 1 and y.ndim == 1: - self.coef_ = xp.reshape(self.coef_, (-1,)) + self.coef_ = np.reshape(self.coef_, (-1,)) self.intercept_ = self.intercept_[0] return self diff --git a/sklearnex/linear_model/linear.py b/sklearnex/linear_model/linear.py index fb7eca8cf1..7a5849d021 100644 --- a/sklearnex/linear_model/linear.py +++ b/sklearnex/linear_model/linear.py @@ -249,10 +249,12 @@ def _onedal_fit(self, X, y, sample_weight, queue=None): "y_numeric": True, "multi_output": supports_multi_output, } - if sklearn_check_version("1.0"): - X, y = validate_data(self, **check_params) - else: - X, y = check_X_y(**check_params) + + if get_config()["use_raw_input"] is False: + if sklearn_check_version("1.0"): + X, y = validate_data(self, **check_params) + else: + X, y = check_X_y(**check_params) if sklearn_check_version("1.0") and not sklearn_check_version("1.2"): self._normalize = _deprecate_normalize( diff --git a/sklearnex/linear_model/tests/test_linear.py b/sklearnex/linear_model/tests/test_linear.py index 128f5110dc..f9b4be26a3 100644 --- a/sklearnex/linear_model/tests/test_linear.py +++ b/sklearnex/linear_model/tests/test_linear.py @@ -26,6 +26,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex._config import config_context from sklearnex.tests.utils import _IS_INTEL @@ -34,8 +35,16 @@ @pytest.mark.parametrize("macro_block", [None, 1024]) @pytest.mark.parametrize("overdetermined", [False, True]) @pytest.mark.parametrize("multi_output", [False, True]) +@pytest.mark.parametrize("use_raw_input", [True, False]) def test_sklearnex_import_linear( - dataframe, queue, dtype, macro_block, overdetermined, multi_output + skip_unsupported_raw_input, + dataframe, + queue, + dtype, + macro_block, + overdetermined, + multi_output, + use_raw_input, ): if (not overdetermined or multi_output) and not daal_check_version((2025, "P", 1)): pytest.skip("Functionality introduced in later versions") @@ -71,7 +80,9 @@ def test_sklearnex_import_linear( y_list = y.tolist() X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) - linreg.fit(X, y) + + with config_context(use_raw_input=use_raw_input): + linreg.fit(X, y) assert hasattr(linreg, "_onedal_estimator") assert "sklearnex" in linreg.__module__ From bb5206f9512a0eade1c86c86a19b14ba73405dd0 Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Thu, 9 Jan 2025 18:59:37 +0000 Subject: [PATCH 48/56] raw inputs updates for functional forest predict --- onedal/ensemble/forest.py | 2 +- sklearnex/ensemble/_forest.py | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index ddb214b010..0ed80cdc35 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -514,7 +514,7 @@ def predict(self, X, queue=None): -1, ) - return xp.take(self.classes_, pred.astype(xp.int64, casting="unsafe")) + return xp.take(self.classes_, xp.astype(xp.reshape(pred, -1), xp.int64, casting="unsafe")) def predict_proba(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index ab62c219a4..82ece5306a 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -812,17 +812,18 @@ def _onedal_gpu_supported(self, method_name, *data): return patching_status def _onedal_predict(self, X, queue=None): - if sklearn_check_version("1.0"): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - force_all_finite=False, - reset=False, - ensure_2d=True, - ) - else: - if not get_config()["use_raw_input"]: + xp, _ = get_namespace(X) + if not get_config()["use_raw_input"]: + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + force_all_finite=False, + reset=False, + ensure_2d=True, + ) + else: X = check_array( X, dtype=[np.float64, np.float32], @@ -844,7 +845,7 @@ def _onedal_predict(self, X, queue=None): self._check_n_features(X, reset=False) res = self._onedal_estimator.predict(X, queue=queue) - return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe")) + return xp.take(self.classes_, xp.astype(xp.reshape(res, -1), xp.int64, casting="unsafe")) def _onedal_predict_proba(self, X, queue=None): xp, _ = get_namespace(X) From 8211a239b2d23aa22cba8bdde76c4fdba009d728 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Fri, 17 Jan 2025 16:42:43 -0800 Subject: [PATCH 49/56] fixes for logreg predict_proba, knnreg, inc cov, inc pca --- onedal/linear_model/logistic_regression.py | 4 +- onedal/neighbors/neighbors.py | 17 ++++++--- .../covariance/incremental_covariance.py | 38 +++++++++++-------- .../preview/decomposition/incremental_pca.py | 26 +++++++------ 4 files changed, 50 insertions(+), 35 deletions(-) diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index 147a3686b7..1f5278fee5 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -219,8 +219,8 @@ def _predict_proba(self, X, module, queue): result = self._infer(X, module, queue, sua_iface) y = from_table(result.probabilities, sua_iface=sua_iface, sycl_queue=queue, xp=xp) - y = y.reshape(-1, 1) - return xp.hstack([1 - y, y]) + y = xp.reshape(y, (-1, 1)) + return xp.concat([1 - y, y], axis=0) def _predict_log_proba(self, X, module, queue): _, xp, _ = _get_sycl_namespace(X) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 074a9c5ed4..1678d6ccb2 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -27,6 +27,7 @@ kdtree_knn_classification_prediction, kdtree_knn_classification_training, ) +from ..utils._array_api import _get_sycl_namespace from .._config import _get_config from ..common._base import BaseEstimator @@ -205,11 +206,14 @@ def _fit(self, X, y, queue): self, "effective_metric_params_", self.metric_params ) + _, xp, _ = _get_sycl_namespace(X) + use_raw_input = _get_config().get("use_raw_input", False) is True if y is not None or self.requires_y: shape = getattr(y, "shape", None) - X, y = super()._validate_data( - X, y, dtype=[np.float64, np.float32], accept_sparse="csr" - ) + if not use_raw_input: + X, y = super()._validate_data( + X, y, dtype=[np.float64, np.float32], accept_sparse="csr" + ) self._shape = shape if shape is not None else y.shape if _is_classifier(self): @@ -233,7 +237,7 @@ def _fit(self, X, y, queue): self._validate_n_classes() else: self._y = y - else: + elif not use_raw_input: X, _ = super()._validate_data(X, dtype=[np.float64, np.float32]) self.n_samples_fit_ = X.shape[0] @@ -261,7 +265,7 @@ def _fit(self, X, y, queue): result = self._onedal_fit(X, _fit_y, queue) if y is not None and _is_regressor(self): - self._y = y if self._shape is None else y.reshape(self._shape) + self._y = y if self._shape is None else xp.reshape(y, self._shape) self._onedal_model = result result = self @@ -625,7 +629,8 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None) return super()._kneighbors(X, n_neighbors, return_distance, queue=queue) def _predict_gpu(self, X, queue=None): - X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) + if _get_config()["use_raw_input"] is False: + X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) onedal_model = getattr(self, "_onedal_model", None) n_features = getattr(self, "n_features_in_", None) n_samples_fit_ = getattr(self, "n_samples_fit_", None) diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 61e2cb7f51..25bede96de 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -32,6 +32,7 @@ ) from sklearnex import config_context +from .._config import get_config from .._device_offload import dispatch, wrap_output_data from .._utils import IntelEstimator, PatchingConditionsChain, register_hyperparameters from ..metrics import pairwise_distances @@ -186,6 +187,9 @@ def location_(self): def _onedal_partial_fit(self, X, queue=None, check_input=True): first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0 + use_raw_input = get_config()["use_raw_input"] + # never check input when using raw input + check_input &= use_raw_input is False # finite check occurs on onedal side if check_input: if sklearn_check_version("1.2"): @@ -333,23 +337,25 @@ def _onedal_fit(self, X, queue=None): if hasattr(self, "_onedal_estimator"): self._onedal_estimator._reset() - if sklearn_check_version("1.2"): - self._validate_params() + use_raw_input = get_config()["use_raw_input"] + if not use_raw_input: + if sklearn_check_version("1.2"): + self._validate_params() - # finite check occurs on onedal side - if sklearn_check_version("1.0"): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - copy=self.copy, - force_all_finite=False, - ) - else: - X = check_array( - X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False - ) - self.n_features_in_ = X.shape[1] + # finite check occurs on onedal side + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + copy=self.copy, + force_all_finite=False, + ) + else: + X = check_array( + X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False + ) + self.n_features_in_ = X.shape[1] self.batch_size_ = self.batch_size if self.batch_size else 5 * self.n_features_in_ diff --git a/sklearnex/preview/decomposition/incremental_pca.py b/sklearnex/preview/decomposition/incremental_pca.py index 185a137087..92364490db 100644 --- a/sklearnex/preview/decomposition/incremental_pca.py +++ b/sklearnex/preview/decomposition/incremental_pca.py @@ -60,7 +60,9 @@ def _onedal_transform(self, X, queue=None): assert hasattr(self, "_onedal_estimator") if self._need_to_finalize: self._onedal_finalize_fit() - X = check_array(X, dtype=[np.float64, np.float32]) + use_raw_input = get_config()["use_raw_input"] + if not use_raw_input: + X = check_array(X, dtype=[np.float64, np.float32]) return self._onedal_estimator.predict(X, queue) def _onedal_fit_transform(self, X, queue=None): @@ -125,17 +127,19 @@ def _onedal_finalize_fit(self, queue=None): self._need_to_finalize = False def _onedal_fit(self, X, queue=None): - if sklearn_check_version("1.2"): - self._validate_params() + use_raw_input = get_config()["use_raw_input"] + if not use_raw_input: + if sklearn_check_version("1.2"): + self._validate_params() - if sklearn_check_version("1.0"): - X = validate_data(self, X, dtype=[np.float64, np.float32], copy=self.copy) - else: - X = check_array( - X, - dtype=[np.float64, np.float32], - copy=self.copy, - ) + if sklearn_check_version("1.0"): + X = validate_data(self, X, dtype=[np.float64, np.float32], copy=self.copy) + else: + X = check_array( + X, + dtype=[np.float64, np.float32], + copy=self.copy, + ) n_samples, n_features = X.shape From e3425bf2627334c6905c5c828bbe9ce1adf4f8d6 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 20 Jan 2025 10:31:48 -0800 Subject: [PATCH 50/56] dbscan + inc linreg changes --- onedal/cluster/dbscan.py | 4 ++- onedal/linear_model/linear_model.py | 3 +- sklearnex/linear_model/incremental_linear.py | 36 +++++++++++--------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 7581fb922d..75c7c771a2 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -60,12 +60,14 @@ def _get_onedal_params(self, dtype=np.float32): def _fit(self, X, y, sample_weight, module, queue): use_raw_input = _get_config().get("use_raw_input", False) is True - policy = self._get_policy(queue, X) if not use_raw_input: X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None X = make2d(X) + else: + queue = X.sycl_queue + policy = self._get_policy(queue, X) X_table, sample_weight_table = to_table(X, sample_weight, queue=queue) params = self._get_onedal_params(X_table.dtype) result = module.compute(policy, params, X_table, sample_weight_table) diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index 9b9c1b5a8b..f3308b7beb 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -123,7 +123,8 @@ def predict(self, X, queue=None): sua_iface, xp, _ = _get_sycl_namespace(X) use_raw_input = _get_config().get("use_raw_input") is True - + if use_raw_input: + queue = X.sycl_queue policy = self._get_policy(queue, X) if not use_raw_input: diff --git a/sklearnex/linear_model/incremental_linear.py b/sklearnex/linear_model/incremental_linear.py index 98708b4821..8ea65ff925 100644 --- a/sklearnex/linear_model/incremental_linear.py +++ b/sklearnex/linear_model/incremental_linear.py @@ -150,23 +150,24 @@ def _onedal_supported(self, method_name, *data): _onedal_gpu_supported = _onedal_supported def _onedal_predict(self, X, queue=None): - if sklearn_check_version("1.2"): - self._validate_params() + if get_config()["use_raw_input"] is False: + if sklearn_check_version("1.2"): + self._validate_params() - if sklearn_check_version("1.0"): - X = validate_data( - self, - X, - dtype=[np.float64, np.float32], - copy=self.copy_X, - reset=False, - ) - else: - X = check_array( - X, - dtype=[np.float64, np.float32], - copy=self.copy_X, - ) + if sklearn_check_version("1.0"): + X = validate_data( + self, + X, + dtype=[np.float64, np.float32], + copy=self.copy_X, + reset=False, + ) + else: + X = check_array( + X, + dtype=[np.float64, np.float32], + copy=self.copy_X, + ) assert hasattr(self, "_onedal_estimator") if self._need_to_finalize: @@ -184,6 +185,9 @@ def _onedal_partial_fit(self, X, y, check_input=True, queue=None): if sklearn_check_version("1.2"): self._validate_params() + use_raw_input = get_config()["use_raw_input"] + # never check input when using raw input + check_input &= use_raw_input is False if check_input: if sklearn_check_version("1.0"): X, y = validate_data( From 52ba18ab54389351c1c6b253056be755498f890d Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 20 Jan 2025 14:33:26 -0800 Subject: [PATCH 51/56] black --- onedal/ensemble/forest.py | 4 +++- sklearnex/covariance/incremental_covariance.py | 5 ++++- sklearnex/ensemble/_forest.py | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 0ed80cdc35..8cb77367a7 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -514,7 +514,9 @@ def predict(self, X, queue=None): -1, ) - return xp.take(self.classes_, xp.astype(xp.reshape(pred, -1), xp.int64, casting="unsafe")) + return xp.take( + self.classes_, xp.astype(xp.reshape(pred, -1), xp.int64, casting="unsafe") + ) def predict_proba(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 25bede96de..2dfb898615 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -353,7 +353,10 @@ def _onedal_fit(self, X, queue=None): ) else: X = check_array( - X, dtype=[np.float64, np.float32], copy=self.copy, force_all_finite=False + X, + dtype=[np.float64, np.float32], + copy=self.copy, + force_all_finite=False, ) self.n_features_in_ = X.shape[1] diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 82ece5306a..ff370b0558 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -845,7 +845,9 @@ def _onedal_predict(self, X, queue=None): self._check_n_features(X, reset=False) res = self._onedal_estimator.predict(X, queue=queue) - return xp.take(self.classes_, xp.astype(xp.reshape(res, -1), xp.int64, casting="unsafe")) + return xp.take( + self.classes_, xp.astype(xp.reshape(res, -1), xp.int64, casting="unsafe") + ) def _onedal_predict_proba(self, X, queue=None): xp, _ = get_namespace(X) From 90b717557f4aff1c73b66bb09cb27a8e60ac94f8 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 21 Jan 2025 11:38:54 -0800 Subject: [PATCH 52/56] temporary for CI --- onedal/ensemble/forest.py | 9 ++++++--- sklearnex/ensemble/_forest.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 8cb77367a7..a00a2f25f7 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -514,9 +514,12 @@ def predict(self, X, queue=None): -1, ) - return xp.take( - self.classes_, xp.astype(xp.reshape(pred, -1), xp.int64, casting="unsafe") - ) + try: + return xp.take( + self.classes_, xp.astype(xp.reshape(pred, -1), xp.int64, casting="unsafe") + ) + except: + return np.take(self.classes_, pred.ravel().astype(np.int64, casting="unsafe")) def predict_proba(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index ff370b0558..5cbadc69f0 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -845,9 +845,12 @@ def _onedal_predict(self, X, queue=None): self._check_n_features(X, reset=False) res = self._onedal_estimator.predict(X, queue=queue) - return xp.take( - self.classes_, xp.astype(xp.reshape(res, -1), xp.int64, casting="unsafe") - ) + try: + return xp.take( + self.classes_, xp.astype(xp.reshape(res, -1), xp.int64, casting="unsafe") + ) + except: + return np.take(self.classes_, res.ravel().astype(np.int64, casting="unsafe")) def _onedal_predict_proba(self, X, queue=None): xp, _ = get_namespace(X) From f4d18cd445254cf3a1859439e375d3b684ca003e Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 21 Jan 2025 11:41:15 -0800 Subject: [PATCH 53/56] isorted --- onedal/neighbors/neighbors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 1678d6ccb2..2ca55df0d1 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -27,7 +27,6 @@ kdtree_knn_classification_prediction, kdtree_knn_classification_training, ) -from ..utils._array_api import _get_sycl_namespace from .._config import _get_config from ..common._base import BaseEstimator @@ -42,6 +41,7 @@ _column_or_1d, _num_samples, ) +from ..utils._array_api import _get_sycl_namespace class NeighborsCommonBase(BaseEstimator, metaclass=ABCMeta): From d84a55987446f7930fd3f27206f83f6bc127d1c6 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 21 Jan 2025 16:15:23 -0800 Subject: [PATCH 54/56] tuple indices safeguarding --- onedal/_device_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 6a89d43b81..20aad22ecc 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -92,7 +92,7 @@ def _transfer_to_host(queue, *data): buffer = as_usm_memory(item).copy_to_host() order = "C" - if usm_iface["strides"] is not None: + if usm_iface["strides"] is not None and len(usm_iface["strides"]) > 1: if usm_iface["strides"][0] < usm_iface["strides"][1]: order = "F" item = np.ndarray( From 2daeeb7e179defbb8ea2574c8ba1fafce2f0e325 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 21 Jan 2025 16:15:35 -0800 Subject: [PATCH 55/56] incremental bs fit fixes --- .../incremental_basic_statistics.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py index cb77f15d4d..cf5b6a30dc 100644 --- a/sklearnex/basic_statistics/incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/incremental_basic_statistics.py @@ -232,16 +232,18 @@ def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=Tru self._need_to_finalize = True def _onedal_fit(self, X, sample_weight=None, queue=None): - if sklearn_check_version("1.2"): - self._validate_params() + use_raw_input = get_config()["use_raw_input"] + if not use_raw_input: + if sklearn_check_version("1.2"): + self._validate_params() - if sklearn_check_version("1.0"): - X = validate_data(self, X, dtype=[np.float64, np.float32]) - else: - X = check_array(X, dtype=[np.float64, np.float32]) + if sklearn_check_version("1.0"): + X = validate_data(self, X, dtype=[np.float64, np.float32]) + else: + X = check_array(X, dtype=[np.float64, np.float32]) - if sample_weight is not None: - sample_weight = _check_sample_weight(sample_weight, X) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) n_samples, n_features = X.shape if self.batch_size is None: From fb3d0bc92f315314c2501495acd1249060169993 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 21 Jan 2025 16:21:14 -0800 Subject: [PATCH 56/56] dbscan CI fixes --- onedal/cluster/dbscan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 75c7c771a2..3cf46b55b4 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -60,19 +60,19 @@ def _get_onedal_params(self, dtype=np.float32): def _fit(self, X, y, sample_weight, module, queue): use_raw_input = _get_config().get("use_raw_input", False) is True + sua_iface, _, _ = _get_sycl_namespace(X) if not use_raw_input: X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None X = make2d(X) - else: + elif sua_iface is not None: queue = X.sycl_queue policy = self._get_policy(queue, X) X_table, sample_weight_table = to_table(X, sample_weight, queue=queue) params = self._get_onedal_params(X_table.dtype) result = module.compute(policy, params, X_table, sample_weight_table) - _, xp, _ = _get_sycl_namespace(X) self.labels_ = from_table(result.responses, sycl_queue=queue).ravel() if ( result.core_observation_indices is not None