Skip to content

Commit

Permalink
return sycl python tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry Razdoburdin committed Jun 25, 2024
1 parent 59fd57b commit 6de2c80
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 0 deletions.
59 changes: 59 additions & 0 deletions tests/python-sycl/test_sycl_training_continuation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
import xgboost as xgb
import json

rng = np.random.RandomState(1994)


class TestSYCLTrainingContinuation:
def run_training_continuation(self, use_json):
kRows = 64
kCols = 32
X = np.random.randn(kRows, kCols)
y = np.random.randn(kRows)
dtrain = xgb.DMatrix(X, y)
params = {
"device": "sycl",
"max_depth": "2",
"gamma": "0.1",
"alpha": "0.01",
"enable_experimental_json_serialization": use_json,
}
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
dump_0 = bst_0.get_dump(dump_format="json")

bst_1 = xgb.train(params, dtrain, num_boost_round=32)
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
dump_1 = bst_1.get_dump(dump_format="json")

def recursive_compare(obj_0, obj_1):
if isinstance(obj_0, float):
assert np.isclose(obj_0, obj_1, atol=1e-6)
elif isinstance(obj_0, str):
assert obj_0 == obj_1
elif isinstance(obj_0, int):
assert obj_0 == obj_1
elif isinstance(obj_0, dict):
keys_0 = list(obj_0.keys())
keys_1 = list(obj_1.keys())
values_0 = list(obj_0.values())
values_1 = list(obj_1.values())
for i in range(len(obj_0.items())):
assert keys_0[i] == keys_1[i]
if list(obj_0.keys())[i] != "missing":
recursive_compare(values_0[i], values_1[i])
else:
for i in range(len(obj_0)):
recursive_compare(obj_0[i], obj_1[i])

assert len(dump_0) == len(dump_1)
for i in range(len(dump_0)):
obj_0 = json.loads(dump_0[i])
obj_1 = json.loads(dump_1[i])
recursive_compare(obj_0, obj_1)

def test_sycl_training_continuation_binary(self):
self.run_training_continuation(False)

def test_sycl_training_continuation_json(self):
self.run_training_continuation(True)
80 changes: 80 additions & 0 deletions tests/python-sycl/test_sycl_updaters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import numpy as np
import gc
import pytest
import xgboost as xgb
from hypothesis import given, strategies, assume, settings, note

import sys
import os

# sys.path.append("tests/python")
# import testing as tm
from xgboost import testing as tm

parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(0, 11),
"max_leaves": strategies.integers(0, 256),
"max_bin": strategies.integers(2, 1024),
"grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
"single_precision_histogram": strategies.booleans(),
"min_child_weight": strategies.floats(0.5, 2.0),
"seed": strategies.integers(0, 10),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
"colsample_bytree": strategies.floats(0.5, 1.0),
"colsample_bylevel": strategies.floats(0.5, 1.0),
}
).filter(
lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0)
and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide")
)


def train_result(param, dmat, num_rounds):
result = {}
xgb.train(
param,
dmat,
num_rounds,
[(dmat, "train")],
verbose_eval=False,
evals_result=result,
)
return result


class TestSYCLUpdaters:
@given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy())
@settings(deadline=None)
def test_sycl_hist(self, param, num_rounds, dataset):
param["tree_method"] = "hist"
param["device"] = "sycl"
param["verbosity"] = 0
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result["train"][dataset.metric])

@given(tm.make_dataset_strategy(), strategies.integers(0, 1))
@settings(deadline=None)
def test_specified_device_id_sycl_update(self, dataset, device_id):
# Read the list of sycl-devicese
sycl_ls = os.popen("sycl-ls").read()
devices = sycl_ls.split("\n")

# Test should launch only on gpu
# Find gpus in the list of devices
# and use the id in the list insteard of device_id
target_device_type = "opencl:gpu"
found_devices = 0
for idx in range(len(devices)):
if len(devices[idx]) >= len(target_device_type):
if devices[idx][1 : 1 + len(target_device_type)] == target_device_type:
if found_devices == device_id:
param = {"device": f"sycl:gpu:{idx}"}
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), 10)
assert tm.non_increasing(result["train"][dataset.metric])
else:
found_devices += 1
37 changes: 37 additions & 0 deletions tests/python-sycl/test_sycl_with_sklearn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import xgboost as xgb
import pytest
import sys
import numpy as np

from xgboost import testing as tm

sys.path.append("tests/python")
import test_with_sklearn as twskl # noqa

pytestmark = pytest.mark.skipif(**tm.no_sklearn())

rng = np.random.RandomState(1994)


def test_sycl_binary_classification():
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold

digits = load_digits(n_class=2)
y = digits["target"]
X = digits["data"]
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
for train_index, test_index in kf.split(X, y):
xgb_model = cls(random_state=42, device="sycl", n_estimators=4).fit(
X[train_index], y[train_index]
)
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
print(preds)
print(labels)
print(err)
assert err < 0.1

0 comments on commit 6de2c80

Please sign in to comment.