forked from dmlc/xgboost
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Dmitry Razdoburdin
committed
Jun 25, 2024
1 parent
59fd57b
commit 6de2c80
Showing
3 changed files
with
176 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import numpy as np | ||
import xgboost as xgb | ||
import json | ||
|
||
rng = np.random.RandomState(1994) | ||
|
||
|
||
class TestSYCLTrainingContinuation: | ||
def run_training_continuation(self, use_json): | ||
kRows = 64 | ||
kCols = 32 | ||
X = np.random.randn(kRows, kCols) | ||
y = np.random.randn(kRows) | ||
dtrain = xgb.DMatrix(X, y) | ||
params = { | ||
"device": "sycl", | ||
"max_depth": "2", | ||
"gamma": "0.1", | ||
"alpha": "0.01", | ||
"enable_experimental_json_serialization": use_json, | ||
} | ||
bst_0 = xgb.train(params, dtrain, num_boost_round=64) | ||
dump_0 = bst_0.get_dump(dump_format="json") | ||
|
||
bst_1 = xgb.train(params, dtrain, num_boost_round=32) | ||
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1) | ||
dump_1 = bst_1.get_dump(dump_format="json") | ||
|
||
def recursive_compare(obj_0, obj_1): | ||
if isinstance(obj_0, float): | ||
assert np.isclose(obj_0, obj_1, atol=1e-6) | ||
elif isinstance(obj_0, str): | ||
assert obj_0 == obj_1 | ||
elif isinstance(obj_0, int): | ||
assert obj_0 == obj_1 | ||
elif isinstance(obj_0, dict): | ||
keys_0 = list(obj_0.keys()) | ||
keys_1 = list(obj_1.keys()) | ||
values_0 = list(obj_0.values()) | ||
values_1 = list(obj_1.values()) | ||
for i in range(len(obj_0.items())): | ||
assert keys_0[i] == keys_1[i] | ||
if list(obj_0.keys())[i] != "missing": | ||
recursive_compare(values_0[i], values_1[i]) | ||
else: | ||
for i in range(len(obj_0)): | ||
recursive_compare(obj_0[i], obj_1[i]) | ||
|
||
assert len(dump_0) == len(dump_1) | ||
for i in range(len(dump_0)): | ||
obj_0 = json.loads(dump_0[i]) | ||
obj_1 = json.loads(dump_1[i]) | ||
recursive_compare(obj_0, obj_1) | ||
|
||
def test_sycl_training_continuation_binary(self): | ||
self.run_training_continuation(False) | ||
|
||
def test_sycl_training_continuation_json(self): | ||
self.run_training_continuation(True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import numpy as np | ||
import gc | ||
import pytest | ||
import xgboost as xgb | ||
from hypothesis import given, strategies, assume, settings, note | ||
|
||
import sys | ||
import os | ||
|
||
# sys.path.append("tests/python") | ||
# import testing as tm | ||
from xgboost import testing as tm | ||
|
||
parameter_strategy = strategies.fixed_dictionaries( | ||
{ | ||
"max_depth": strategies.integers(0, 11), | ||
"max_leaves": strategies.integers(0, 256), | ||
"max_bin": strategies.integers(2, 1024), | ||
"grow_policy": strategies.sampled_from(["lossguide", "depthwise"]), | ||
"single_precision_histogram": strategies.booleans(), | ||
"min_child_weight": strategies.floats(0.5, 2.0), | ||
"seed": strategies.integers(0, 10), | ||
# We cannot enable subsampling as the training loss can increase | ||
# 'subsample': strategies.floats(0.5, 1.0), | ||
"colsample_bytree": strategies.floats(0.5, 1.0), | ||
"colsample_bylevel": strategies.floats(0.5, 1.0), | ||
} | ||
).filter( | ||
lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0) | ||
and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide") | ||
) | ||
|
||
|
||
def train_result(param, dmat, num_rounds): | ||
result = {} | ||
xgb.train( | ||
param, | ||
dmat, | ||
num_rounds, | ||
[(dmat, "train")], | ||
verbose_eval=False, | ||
evals_result=result, | ||
) | ||
return result | ||
|
||
|
||
class TestSYCLUpdaters: | ||
@given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy()) | ||
@settings(deadline=None) | ||
def test_sycl_hist(self, param, num_rounds, dataset): | ||
param["tree_method"] = "hist" | ||
param["device"] = "sycl" | ||
param["verbosity"] = 0 | ||
param = dataset.set_params(param) | ||
result = train_result(param, dataset.get_dmat(), num_rounds) | ||
note(result) | ||
assert tm.non_increasing(result["train"][dataset.metric]) | ||
|
||
@given(tm.make_dataset_strategy(), strategies.integers(0, 1)) | ||
@settings(deadline=None) | ||
def test_specified_device_id_sycl_update(self, dataset, device_id): | ||
# Read the list of sycl-devicese | ||
sycl_ls = os.popen("sycl-ls").read() | ||
devices = sycl_ls.split("\n") | ||
|
||
# Test should launch only on gpu | ||
# Find gpus in the list of devices | ||
# and use the id in the list insteard of device_id | ||
target_device_type = "opencl:gpu" | ||
found_devices = 0 | ||
for idx in range(len(devices)): | ||
if len(devices[idx]) >= len(target_device_type): | ||
if devices[idx][1 : 1 + len(target_device_type)] == target_device_type: | ||
if found_devices == device_id: | ||
param = {"device": f"sycl:gpu:{idx}"} | ||
param = dataset.set_params(param) | ||
result = train_result(param, dataset.get_dmat(), 10) | ||
assert tm.non_increasing(result["train"][dataset.metric]) | ||
else: | ||
found_devices += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import xgboost as xgb | ||
import pytest | ||
import sys | ||
import numpy as np | ||
|
||
from xgboost import testing as tm | ||
|
||
sys.path.append("tests/python") | ||
import test_with_sklearn as twskl # noqa | ||
|
||
pytestmark = pytest.mark.skipif(**tm.no_sklearn()) | ||
|
||
rng = np.random.RandomState(1994) | ||
|
||
|
||
def test_sycl_binary_classification(): | ||
from sklearn.datasets import load_digits | ||
from sklearn.model_selection import KFold | ||
|
||
digits = load_digits(n_class=2) | ||
y = digits["target"] | ||
X = digits["data"] | ||
kf = KFold(n_splits=2, shuffle=True, random_state=rng) | ||
for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier): | ||
for train_index, test_index in kf.split(X, y): | ||
xgb_model = cls(random_state=42, device="sycl", n_estimators=4).fit( | ||
X[train_index], y[train_index] | ||
) | ||
preds = xgb_model.predict(X[test_index]) | ||
labels = y[test_index] | ||
err = sum( | ||
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i] | ||
) / float(len(preds)) | ||
print(preds) | ||
print(labels) | ||
print(err) | ||
assert err < 0.1 |