return sycl python tests

razdoburdin · Jun 25, 2024 · 6de2c80 · 6de2c80
1 parent 59fd57b
commit 6de2c80
Show file tree

Hide file tree

Showing 3 changed files with 176 additions and 0 deletions.
diff --git a/tests/python-sycl/test_sycl_training_continuation.py b/tests/python-sycl/test_sycl_training_continuation.py
@@ -0,0 +1,59 @@
+import numpy as np
+import xgboost as xgb
+import json
+
+rng = np.random.RandomState(1994)
+
+
+class TestSYCLTrainingContinuation:
+    def run_training_continuation(self, use_json):
+        kRows = 64
+        kCols = 32
+        X = np.random.randn(kRows, kCols)
+        y = np.random.randn(kRows)
+        dtrain = xgb.DMatrix(X, y)
+        params = {
+            "device": "sycl",
+            "max_depth": "2",
+            "gamma": "0.1",
+            "alpha": "0.01",
+            "enable_experimental_json_serialization": use_json,
+        }
+        bst_0 = xgb.train(params, dtrain, num_boost_round=64)
+        dump_0 = bst_0.get_dump(dump_format="json")
+
+        bst_1 = xgb.train(params, dtrain, num_boost_round=32)
+        bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
+        dump_1 = bst_1.get_dump(dump_format="json")
+
+        def recursive_compare(obj_0, obj_1):
+            if isinstance(obj_0, float):
+                assert np.isclose(obj_0, obj_1, atol=1e-6)
+            elif isinstance(obj_0, str):
+                assert obj_0 == obj_1
+            elif isinstance(obj_0, int):
+                assert obj_0 == obj_1
+            elif isinstance(obj_0, dict):
+                keys_0 = list(obj_0.keys())
+                keys_1 = list(obj_1.keys())
+                values_0 = list(obj_0.values())
+                values_1 = list(obj_1.values())
+                for i in range(len(obj_0.items())):
+                    assert keys_0[i] == keys_1[i]
+                    if list(obj_0.keys())[i] != "missing":
+                        recursive_compare(values_0[i], values_1[i])
+            else:
+                for i in range(len(obj_0)):
+                    recursive_compare(obj_0[i], obj_1[i])
+
+        assert len(dump_0) == len(dump_1)
+        for i in range(len(dump_0)):
+            obj_0 = json.loads(dump_0[i])
+            obj_1 = json.loads(dump_1[i])
+            recursive_compare(obj_0, obj_1)
+
+    def test_sycl_training_continuation_binary(self):
+        self.run_training_continuation(False)
+
+    def test_sycl_training_continuation_json(self):
+        self.run_training_continuation(True)
diff --git a/tests/python-sycl/test_sycl_updaters.py b/tests/python-sycl/test_sycl_updaters.py
@@ -0,0 +1,80 @@
+import numpy as np
+import gc
+import pytest
+import xgboost as xgb
+from hypothesis import given, strategies, assume, settings, note
+
+import sys
+import os
+
+# sys.path.append("tests/python")
+# import testing as tm
+from xgboost import testing as tm
+
+parameter_strategy = strategies.fixed_dictionaries(
+    {
+        "max_depth": strategies.integers(0, 11),
+        "max_leaves": strategies.integers(0, 256),
+        "max_bin": strategies.integers(2, 1024),
+        "grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
+        "single_precision_histogram": strategies.booleans(),
+        "min_child_weight": strategies.floats(0.5, 2.0),
+        "seed": strategies.integers(0, 10),
+        # We cannot enable subsampling as the training loss can increase
+        # 'subsample': strategies.floats(0.5, 1.0),
+        "colsample_bytree": strategies.floats(0.5, 1.0),
+        "colsample_bylevel": strategies.floats(0.5, 1.0),
+    }
+).filter(
+    lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0)
+    and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide")
+)
+
+
+def train_result(param, dmat, num_rounds):
+    result = {}
+    xgb.train(
+        param,
+        dmat,
+        num_rounds,
+        [(dmat, "train")],
+        verbose_eval=False,
+        evals_result=result,
+    )
+    return result
+
+
+class TestSYCLUpdaters:
+    @given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy())
+    @settings(deadline=None)
+    def test_sycl_hist(self, param, num_rounds, dataset):
+        param["tree_method"] = "hist"
+        param["device"] = "sycl"
+        param["verbosity"] = 0
+        param = dataset.set_params(param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        note(result)
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+    @given(tm.make_dataset_strategy(), strategies.integers(0, 1))
+    @settings(deadline=None)
+    def test_specified_device_id_sycl_update(self, dataset, device_id):
+        # Read the list of sycl-devicese
+        sycl_ls = os.popen("sycl-ls").read()
+        devices = sycl_ls.split("\n")
+
+        # Test should launch only on gpu
+        # Find gpus in the list of devices
+        # and use the id in the list insteard of device_id
+        target_device_type = "opencl:gpu"
+        found_devices = 0
+        for idx in range(len(devices)):
+            if len(devices[idx]) >= len(target_device_type):
+                if devices[idx][1 : 1 + len(target_device_type)] == target_device_type:
+                    if found_devices == device_id:
+                        param = {"device": f"sycl:gpu:{idx}"}
+                        param = dataset.set_params(param)
+                        result = train_result(param, dataset.get_dmat(), 10)
+                        assert tm.non_increasing(result["train"][dataset.metric])
+                    else:
+                        found_devices += 1
diff --git a/tests/python-sycl/test_sycl_with_sklearn.py b/tests/python-sycl/test_sycl_with_sklearn.py
@@ -0,0 +1,37 @@
+import xgboost as xgb
+import pytest
+import sys
+import numpy as np
+
+from xgboost import testing as tm
+
+sys.path.append("tests/python")
+import test_with_sklearn as twskl  # noqa
+
+pytestmark = pytest.mark.skipif(**tm.no_sklearn())
+
+rng = np.random.RandomState(1994)
+
+
+def test_sycl_binary_classification():
+    from sklearn.datasets import load_digits
+    from sklearn.model_selection import KFold
+
+    digits = load_digits(n_class=2)
+    y = digits["target"]
+    X = digits["data"]
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
+        for train_index, test_index in kf.split(X, y):
+            xgb_model = cls(random_state=42, device="sycl", n_estimators=4).fit(
+                X[train_index], y[train_index]
+            )
+            preds = xgb_model.predict(X[test_index])
+            labels = y[test_index]
+            err = sum(
+                1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
+            ) / float(len(preds))
+            print(preds)
+            print(labels)
+            print(err)
+            assert err < 0.1