diff --git a/tests/python-sycl/test_sycl_training_continuation.py b/tests/python-sycl/test_sycl_training_continuation.py new file mode 100644 index 000000000000..e2a11c987bb4 --- /dev/null +++ b/tests/python-sycl/test_sycl_training_continuation.py @@ -0,0 +1,59 @@ +import numpy as np +import xgboost as xgb +import json + +rng = np.random.RandomState(1994) + + +class TestSYCLTrainingContinuation: + def run_training_continuation(self, use_json): + kRows = 64 + kCols = 32 + X = np.random.randn(kRows, kCols) + y = np.random.randn(kRows) + dtrain = xgb.DMatrix(X, y) + params = { + "device": "sycl", + "max_depth": "2", + "gamma": "0.1", + "alpha": "0.01", + "enable_experimental_json_serialization": use_json, + } + bst_0 = xgb.train(params, dtrain, num_boost_round=64) + dump_0 = bst_0.get_dump(dump_format="json") + + bst_1 = xgb.train(params, dtrain, num_boost_round=32) + bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1) + dump_1 = bst_1.get_dump(dump_format="json") + + def recursive_compare(obj_0, obj_1): + if isinstance(obj_0, float): + assert np.isclose(obj_0, obj_1, atol=1e-6) + elif isinstance(obj_0, str): + assert obj_0 == obj_1 + elif isinstance(obj_0, int): + assert obj_0 == obj_1 + elif isinstance(obj_0, dict): + keys_0 = list(obj_0.keys()) + keys_1 = list(obj_1.keys()) + values_0 = list(obj_0.values()) + values_1 = list(obj_1.values()) + for i in range(len(obj_0.items())): + assert keys_0[i] == keys_1[i] + if list(obj_0.keys())[i] != "missing": + recursive_compare(values_0[i], values_1[i]) + else: + for i in range(len(obj_0)): + recursive_compare(obj_0[i], obj_1[i]) + + assert len(dump_0) == len(dump_1) + for i in range(len(dump_0)): + obj_0 = json.loads(dump_0[i]) + obj_1 = json.loads(dump_1[i]) + recursive_compare(obj_0, obj_1) + + def test_sycl_training_continuation_binary(self): + self.run_training_continuation(False) + + def test_sycl_training_continuation_json(self): + self.run_training_continuation(True) diff --git a/tests/python-sycl/test_sycl_updaters.py b/tests/python-sycl/test_sycl_updaters.py new file mode 100644 index 000000000000..57ca8d783bd7 --- /dev/null +++ b/tests/python-sycl/test_sycl_updaters.py @@ -0,0 +1,80 @@ +import numpy as np +import gc +import pytest +import xgboost as xgb +from hypothesis import given, strategies, assume, settings, note + +import sys +import os + +# sys.path.append("tests/python") +# import testing as tm +from xgboost import testing as tm + +parameter_strategy = strategies.fixed_dictionaries( + { + "max_depth": strategies.integers(0, 11), + "max_leaves": strategies.integers(0, 256), + "max_bin": strategies.integers(2, 1024), + "grow_policy": strategies.sampled_from(["lossguide", "depthwise"]), + "single_precision_histogram": strategies.booleans(), + "min_child_weight": strategies.floats(0.5, 2.0), + "seed": strategies.integers(0, 10), + # We cannot enable subsampling as the training loss can increase + # 'subsample': strategies.floats(0.5, 1.0), + "colsample_bytree": strategies.floats(0.5, 1.0), + "colsample_bylevel": strategies.floats(0.5, 1.0), + } +).filter( + lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0) + and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide") +) + + +def train_result(param, dmat, num_rounds): + result = {} + xgb.train( + param, + dmat, + num_rounds, + [(dmat, "train")], + verbose_eval=False, + evals_result=result, + ) + return result + + +class TestSYCLUpdaters: + @given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy()) + @settings(deadline=None) + def test_sycl_hist(self, param, num_rounds, dataset): + param["tree_method"] = "hist" + param["device"] = "sycl" + param["verbosity"] = 0 + param = dataset.set_params(param) + result = train_result(param, dataset.get_dmat(), num_rounds) + note(result) + assert tm.non_increasing(result["train"][dataset.metric]) + + @given(tm.make_dataset_strategy(), strategies.integers(0, 1)) + @settings(deadline=None) + def test_specified_device_id_sycl_update(self, dataset, device_id): + # Read the list of sycl-devicese + sycl_ls = os.popen("sycl-ls").read() + devices = sycl_ls.split("\n") + + # Test should launch only on gpu + # Find gpus in the list of devices + # and use the id in the list insteard of device_id + target_device_type = "opencl:gpu" + found_devices = 0 + for idx in range(len(devices)): + if len(devices[idx]) >= len(target_device_type): + if devices[idx][1 : 1 + len(target_device_type)] == target_device_type: + if found_devices == device_id: + param = {"device": f"sycl:gpu:{idx}"} + param = dataset.set_params(param) + result = train_result(param, dataset.get_dmat(), 10) + assert tm.non_increasing(result["train"][dataset.metric]) + else: + found_devices += 1 diff --git a/tests/python-sycl/test_sycl_with_sklearn.py b/tests/python-sycl/test_sycl_with_sklearn.py new file mode 100644 index 000000000000..8e75e77f8cdc --- /dev/null +++ b/tests/python-sycl/test_sycl_with_sklearn.py @@ -0,0 +1,37 @@ +import xgboost as xgb +import pytest +import sys +import numpy as np + +from xgboost import testing as tm + +sys.path.append("tests/python") +import test_with_sklearn as twskl # noqa + +pytestmark = pytest.mark.skipif(**tm.no_sklearn()) + +rng = np.random.RandomState(1994) + + +def test_sycl_binary_classification(): + from sklearn.datasets import load_digits + from sklearn.model_selection import KFold + + digits = load_digits(n_class=2) + y = digits["target"] + X = digits["data"] + kf = KFold(n_splits=2, shuffle=True, random_state=rng) + for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier): + for train_index, test_index in kf.split(X, y): + xgb_model = cls(random_state=42, device="sycl", n_estimators=4).fit( + X[train_index], y[train_index] + ) + preds = xgb_model.predict(X[test_index]) + labels = y[test_index] + err = sum( + 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i] + ) / float(len(preds)) + print(preds) + print(labels) + print(err) + assert err < 0.1